1
The following changes since commit 00483d386901173e84c7965f9f0d678791a75e01:
1
The following changes since commit d530697ca20e19f7a626f4c1c8b26fccd0dc4470:
2
2
3
Merge remote-tracking branch 'remotes/shorne/tags/or1k-pull-request' into staging (2022-02-28 11:27:16 +0000)
3
Merge tag 'pull-testing-updates-100523-1' of https://gitlab.com/stsquad/qemu into staging (2023-05-10 16:43:01 +0100)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20220228
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230511
8
8
9
for you to fetch changes up to 2ccf40f00e3f29d85d4ff48a9a98870059002290:
9
for you to fetch changes up to b2d4d6616c22325dff802e0a35092167f2dc2268:
10
10
11
tcg/tci: Use tcg_out_ldst in tcg_out_st (2022-02-28 08:04:10 -1000)
11
target/loongarch: Do not include tcg-ldst.h (2023-05-11 06:06:04 +0100)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Fix typecode generation for tcg helpers
14
target/m68k: Fix gen_load_fp regression
15
Fix single stepping into interrupt handlers
15
accel/tcg: Ensure fairness with icount
16
Fix out-of-range offsets for stores in TCI
16
disas: Move disas.c into the target-independent source sets
17
tcg: Use common routines for calling slow path helpers
18
tcg/*: Cleanups to qemu_ld/st constraints
19
tcg: Remove TARGET_ALIGNED_ONLY
20
accel/tcg: Reorg system mode load/store helpers
17
21
18
----------------------------------------------------------------
22
----------------------------------------------------------------
19
Luc Michel (1):
23
Jamie Iles (2):
20
accel/tcg/cpu-exec: Fix precise single-stepping after interrupt
24
cpu: expose qemu_cpu_list_lock for lock-guard use
25
accel/tcg/tcg-accel-ops-rr: ensure fairness with icount
21
26
22
Richard Henderson (2):
27
Richard Henderson (49):
23
tcg: Remove dh_alias indirection for dh_typecode
28
target/m68k: Fix gen_load_fp for OS_LONG
24
tcg/tci: Use tcg_out_ldst in tcg_out_st
29
accel/tcg: Fix atomic_mmu_lookup for reads
30
disas: Fix tabs and braces in disas.c
31
disas: Move disas.c to disas/
32
disas: Remove target_ulong from the interface
33
disas: Remove target-specific headers
34
tcg/i386: Introduce prepare_host_addr
35
tcg/i386: Use indexed addressing for softmmu fast path
36
tcg/aarch64: Introduce prepare_host_addr
37
tcg/arm: Introduce prepare_host_addr
38
tcg/loongarch64: Introduce prepare_host_addr
39
tcg/mips: Introduce prepare_host_addr
40
tcg/ppc: Introduce prepare_host_addr
41
tcg/riscv: Introduce prepare_host_addr
42
tcg/s390x: Introduce prepare_host_addr
43
tcg: Add routines for calling slow-path helpers
44
tcg/i386: Convert tcg_out_qemu_ld_slow_path
45
tcg/i386: Convert tcg_out_qemu_st_slow_path
46
tcg/aarch64: Convert tcg_out_qemu_{ld,st}_slow_path
47
tcg/arm: Convert tcg_out_qemu_{ld,st}_slow_path
48
tcg/loongarch64: Convert tcg_out_qemu_{ld,st}_slow_path
49
tcg/mips: Convert tcg_out_qemu_{ld,st}_slow_path
50
tcg/ppc: Convert tcg_out_qemu_{ld,st}_slow_path
51
tcg/riscv: Convert tcg_out_qemu_{ld,st}_slow_path
52
tcg/s390x: Convert tcg_out_qemu_{ld,st}_slow_path
53
tcg/loongarch64: Simplify constraints on qemu_ld/st
54
tcg/mips: Remove MO_BSWAP handling
55
tcg/mips: Reorg tlb load within prepare_host_addr
56
tcg/mips: Simplify constraints on qemu_ld/st
57
tcg/ppc: Reorg tcg_out_tlb_read
58
tcg/ppc: Adjust constraints on qemu_ld/st
59
tcg/ppc: Remove unused constraints A, B, C, D
60
tcg/ppc: Remove unused constraint J
61
tcg/riscv: Simplify constraints on qemu_ld/st
62
tcg/s390x: Use ALGFR in constructing softmmu host address
63
tcg/s390x: Simplify constraints on qemu_ld/st
64
target/mips: Add MO_ALIGN to gen_llwp, gen_scwp
65
target/mips: Add missing default_tcg_memop_mask
66
target/mips: Use MO_ALIGN instead of 0
67
target/mips: Remove TARGET_ALIGNED_ONLY
68
target/nios2: Remove TARGET_ALIGNED_ONLY
69
target/sh4: Use MO_ALIGN where required
70
target/sh4: Remove TARGET_ALIGNED_ONLY
71
tcg: Remove TARGET_ALIGNED_ONLY
72
accel/tcg: Add cpu_in_serial_context
73
accel/tcg: Introduce tlb_read_idx
74
accel/tcg: Reorg system mode load helpers
75
accel/tcg: Reorg system mode store helpers
76
target/loongarch: Do not include tcg-ldst.h
25
77
26
include/exec/helper-head.h | 19 ++++++++++---------
78
Thomas Huth (2):
27
target/hppa/helper.h | 2 ++
79
disas: Move softmmu specific code to separate file
28
target/i386/ops_sse_header.h | 3 +++
80
disas: Move disas.c into the target-independent source set
29
target/m68k/helper.h | 1 +
81
30
target/ppc/helper.h | 3 +++
82
configs/targets/mips-linux-user.mak | 1 -
31
accel/tcg/cpu-exec.c | 8 ++++++--
83
configs/targets/mips-softmmu.mak | 1 -
32
tcg/tci/tcg-target.c.inc | 5 ++---
84
configs/targets/mips64-linux-user.mak | 1 -
33
7 files changed, 27 insertions(+), 14 deletions(-)
85
configs/targets/mips64-softmmu.mak | 1 -
86
configs/targets/mips64el-linux-user.mak | 1 -
87
configs/targets/mips64el-softmmu.mak | 1 -
88
configs/targets/mipsel-linux-user.mak | 1 -
89
configs/targets/mipsel-softmmu.mak | 1 -
90
configs/targets/mipsn32-linux-user.mak | 1 -
91
configs/targets/mipsn32el-linux-user.mak | 1 -
92
configs/targets/nios2-softmmu.mak | 1 -
93
configs/targets/sh4-linux-user.mak | 1 -
94
configs/targets/sh4-softmmu.mak | 1 -
95
configs/targets/sh4eb-linux-user.mak | 1 -
96
configs/targets/sh4eb-softmmu.mak | 1 -
97
meson.build | 3 -
98
accel/tcg/internal.h | 9 +
99
accel/tcg/tcg-accel-ops-icount.h | 3 +-
100
disas/disas-internal.h | 21 +
101
include/disas/disas.h | 23 +-
102
include/exec/cpu-common.h | 1 +
103
include/exec/cpu-defs.h | 7 +-
104
include/exec/cpu_ldst.h | 26 +-
105
include/exec/memop.h | 13 +-
106
include/exec/poison.h | 1 -
107
tcg/loongarch64/tcg-target-con-set.h | 2 -
108
tcg/loongarch64/tcg-target-con-str.h | 1 -
109
tcg/mips/tcg-target-con-set.h | 13 +-
110
tcg/mips/tcg-target-con-str.h | 2 -
111
tcg/mips/tcg-target.h | 4 +-
112
tcg/ppc/tcg-target-con-set.h | 11 +-
113
tcg/ppc/tcg-target-con-str.h | 7 -
114
tcg/riscv/tcg-target-con-set.h | 2 -
115
tcg/riscv/tcg-target-con-str.h | 1 -
116
tcg/s390x/tcg-target-con-set.h | 2 -
117
tcg/s390x/tcg-target-con-str.h | 1 -
118
accel/tcg/cpu-exec-common.c | 3 +
119
accel/tcg/cputlb.c | 1113 ++++++++++++++++-------------
120
accel/tcg/tb-maint.c | 2 +-
121
accel/tcg/tcg-accel-ops-icount.c | 21 +-
122
accel/tcg/tcg-accel-ops-rr.c | 37 +-
123
bsd-user/elfload.c | 5 +-
124
cpus-common.c | 2 +-
125
disas/disas-mon.c | 65 ++
126
disas.c => disas/disas.c | 109 +--
127
linux-user/elfload.c | 18 +-
128
migration/dirtyrate.c | 26 +-
129
replay/replay.c | 3 +-
130
target/loongarch/csr_helper.c | 1 -
131
target/loongarch/iocsr_helper.c | 1 -
132
target/m68k/translate.c | 1 +
133
target/mips/tcg/mxu_translate.c | 3 +-
134
target/nios2/translate.c | 10 +
135
target/sh4/translate.c | 102 ++-
136
tcg/tcg.c | 480 ++++++++++++-
137
trace/control-target.c | 9 +-
138
target/mips/tcg/micromips_translate.c.inc | 24 +-
139
target/mips/tcg/mips16e_translate.c.inc | 18 +-
140
target/mips/tcg/nanomips_translate.c.inc | 32 +-
141
tcg/aarch64/tcg-target.c.inc | 347 ++++-----
142
tcg/arm/tcg-target.c.inc | 455 +++++-------
143
tcg/i386/tcg-target.c.inc | 453 +++++-------
144
tcg/loongarch64/tcg-target.c.inc | 313 +++-----
145
tcg/mips/tcg-target.c.inc | 870 +++++++---------------
146
tcg/ppc/tcg-target.c.inc | 512 ++++++-------
147
tcg/riscv/tcg-target.c.inc | 304 ++++----
148
tcg/s390x/tcg-target.c.inc | 314 ++++----
149
disas/meson.build | 6 +-
150
68 files changed, 2788 insertions(+), 3039 deletions(-)
151
create mode 100644 disas/disas-internal.h
152
create mode 100644 disas/disas-mon.c
153
rename disas.c => disas/disas.c (79%)
diff view generated by jsdifflib
New patch
1
Case was accidentally dropped in b7a94da9550b.
1
2
3
Tested-by: Laurent Vivier <laurent@vivier.eu>
4
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
target/m68k/translate.c | 1 +
9
1 file changed, 1 insertion(+)
10
11
diff --git a/target/m68k/translate.c b/target/m68k/translate.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/target/m68k/translate.c
14
+++ b/target/m68k/translate.c
15
@@ -XXX,XX +XXX,XX @@ static void gen_load_fp(DisasContext *s, int opsize, TCGv addr, TCGv_ptr fp,
16
switch (opsize) {
17
case OS_BYTE:
18
case OS_WORD:
19
+ case OS_LONG:
20
tcg_gen_qemu_ld_tl(tmp, addr, index, opsize | MO_SIGN | MO_TE);
21
gen_helper_exts32(cpu_env, fp, tmp);
22
break;
23
--
24
2.34.1
25
26
diff view generated by jsdifflib
New patch
1
A copy-paste bug had us looking at the victim cache for writes.
1
2
3
Cc: qemu-stable@nongnu.org
4
Reported-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Fixes: 08dff435e2 ("tcg: Probe the proper permissions for atomic ops")
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Message-Id: <20230505204049.352469-1-richard.henderson@linaro.org>
10
---
11
accel/tcg/cputlb.c | 2 +-
12
1 file changed, 1 insertion(+), 1 deletion(-)
13
14
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/accel/tcg/cputlb.c
17
+++ b/accel/tcg/cputlb.c
18
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
19
} else /* if (prot & PAGE_READ) */ {
20
tlb_addr = tlbe->addr_read;
21
if (!tlb_hit(tlb_addr, addr)) {
22
- if (!VICTIM_TLB_HIT(addr_write, addr)) {
23
+ if (!VICTIM_TLB_HIT(addr_read, addr)) {
24
tlb_fill(env_cpu(env), addr, size,
25
MMU_DATA_LOAD, mmu_idx, retaddr);
26
index = tlb_index(env, mmu_idx, addr);
27
--
28
2.34.1
29
30
diff view generated by jsdifflib
New patch
1
Fix these before moving the file, for checkpatch.pl.
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-Id: <20230510170812.663149-1-richard.henderson@linaro.org>
6
---
7
disas.c | 11 ++++++-----
8
1 file changed, 6 insertions(+), 5 deletions(-)
9
10
diff --git a/disas.c b/disas.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/disas.c
13
+++ b/disas.c
14
@@ -XXX,XX +XXX,XX @@ void target_disas(FILE *out, CPUState *cpu, target_ulong code,
15
}
16
17
for (pc = code; size > 0; pc += count, size -= count) {
18
-    fprintf(out, "0x" TARGET_FMT_lx ": ", pc);
19
-    count = s.info.print_insn(pc, &s.info);
20
-    fprintf(out, "\n");
21
-    if (count < 0)
22
-     break;
23
+ fprintf(out, "0x" TARGET_FMT_lx ": ", pc);
24
+ count = s.info.print_insn(pc, &s.info);
25
+ fprintf(out, "\n");
26
+ if (count < 0) {
27
+ break;
28
+ }
29
if (size < count) {
30
fprintf(out,
31
"Disassembler disagrees with translator over instruction "
32
--
33
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Thomas Huth <thuth@redhat.com>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Message-Id: <20230503072331.1747057-80-richard.henderson@linaro.org>
4
---
5
meson.build | 3 ---
6
disas.c => disas/disas.c | 0
7
disas/meson.build | 4 +++-
8
3 files changed, 3 insertions(+), 4 deletions(-)
9
rename disas.c => disas/disas.c (100%)
1
10
11
diff --git a/meson.build b/meson.build
12
index XXXXXXX..XXXXXXX 100644
13
--- a/meson.build
14
+++ b/meson.build
15
@@ -XXX,XX +XXX,XX @@ specific_ss.add(files('cpu.c'))
16
17
subdir('softmmu')
18
19
-common_ss.add(capstone)
20
-specific_ss.add(files('disas.c'), capstone)
21
-
22
# Work around a gcc bug/misfeature wherein constant propagation looks
23
# through an alias:
24
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99696
25
diff --git a/disas.c b/disas/disas.c
26
similarity index 100%
27
rename from disas.c
28
rename to disas/disas.c
29
diff --git a/disas/meson.build b/disas/meson.build
30
index XXXXXXX..XXXXXXX 100644
31
--- a/disas/meson.build
32
+++ b/disas/meson.build
33
@@ -XXX,XX +XXX,XX @@ common_ss.add(when: 'CONFIG_RISCV_DIS', if_true: files('riscv.c'))
34
common_ss.add(when: 'CONFIG_SH4_DIS', if_true: files('sh4.c'))
35
common_ss.add(when: 'CONFIG_SPARC_DIS', if_true: files('sparc.c'))
36
common_ss.add(when: 'CONFIG_XTENSA_DIS', if_true: files('xtensa.c'))
37
-common_ss.add(when: capstone, if_true: files('capstone.c'))
38
+common_ss.add(when: capstone, if_true: [files('capstone.c'), capstone])
39
+
40
+specific_ss.add(files('disas.c'), capstone)
41
--
42
2.34.1
diff view generated by jsdifflib
New patch
1
Use uint64_t for the pc, and size_t for the size.
1
2
3
Reviewed-by: Thomas Huth <thuth@redhat.com>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-Id: <20230503072331.1747057-81-richard.henderson@linaro.org>
6
---
7
include/disas/disas.h | 17 ++++++-----------
8
bsd-user/elfload.c | 5 +++--
9
disas/disas.c | 19 +++++++++----------
10
linux-user/elfload.c | 5 +++--
11
4 files changed, 21 insertions(+), 25 deletions(-)
12
13
diff --git a/include/disas/disas.h b/include/disas/disas.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/include/disas/disas.h
16
+++ b/include/disas/disas.h
17
@@ -XXX,XX +XXX,XX @@
18
#include "cpu.h"
19
20
/* Disassemble this for me please... (debugging). */
21
-void disas(FILE *out, const void *code, unsigned long size);
22
-void target_disas(FILE *out, CPUState *cpu, target_ulong code,
23
- target_ulong size);
24
+void disas(FILE *out, const void *code, size_t size);
25
+void target_disas(FILE *out, CPUState *cpu, uint64_t code, size_t size);
26
27
-void monitor_disas(Monitor *mon, CPUState *cpu,
28
- target_ulong pc, int nb_insn, int is_physical);
29
+void monitor_disas(Monitor *mon, CPUState *cpu, uint64_t pc,
30
+ int nb_insn, bool is_physical);
31
32
char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size);
33
34
/* Look up symbol for debugging purpose. Returns "" if unknown. */
35
-const char *lookup_symbol(target_ulong orig_addr);
36
+const char *lookup_symbol(uint64_t orig_addr);
37
#endif
38
39
struct syminfo;
40
struct elf32_sym;
41
struct elf64_sym;
42
43
-#if defined(CONFIG_USER_ONLY)
44
-typedef const char *(*lookup_symbol_t)(struct syminfo *s, target_ulong orig_addr);
45
-#else
46
-typedef const char *(*lookup_symbol_t)(struct syminfo *s, hwaddr orig_addr);
47
-#endif
48
+typedef const char *(*lookup_symbol_t)(struct syminfo *s, uint64_t orig_addr);
49
50
struct syminfo {
51
lookup_symbol_t lookup_symbol;
52
diff --git a/bsd-user/elfload.c b/bsd-user/elfload.c
53
index XXXXXXX..XXXXXXX 100644
54
--- a/bsd-user/elfload.c
55
+++ b/bsd-user/elfload.c
56
@@ -XXX,XX +XXX,XX @@ static abi_ulong load_elf_interp(struct elfhdr *interp_elf_ex,
57
58
static int symfind(const void *s0, const void *s1)
59
{
60
- target_ulong addr = *(target_ulong *)s0;
61
+ __typeof(sym->st_value) addr = *(uint64_t *)s0;
62
struct elf_sym *sym = (struct elf_sym *)s1;
63
int result = 0;
64
+
65
if (addr < sym->st_value) {
66
result = -1;
67
} else if (addr >= sym->st_value + sym->st_size) {
68
@@ -XXX,XX +XXX,XX @@ static int symfind(const void *s0, const void *s1)
69
return result;
70
}
71
72
-static const char *lookup_symbolxx(struct syminfo *s, target_ulong orig_addr)
73
+static const char *lookup_symbolxx(struct syminfo *s, uint64_t orig_addr)
74
{
75
#if ELF_CLASS == ELFCLASS32
76
struct elf_sym *syms = s->disas_symtab.elf32;
77
diff --git a/disas/disas.c b/disas/disas.c
78
index XXXXXXX..XXXXXXX 100644
79
--- a/disas/disas.c
80
+++ b/disas/disas.c
81
@@ -XXX,XX +XXX,XX @@ static void initialize_debug_host(CPUDebug *s)
82
}
83
84
/* Disassemble this for me please... (debugging). */
85
-void target_disas(FILE *out, CPUState *cpu, target_ulong code,
86
- target_ulong size)
87
+void target_disas(FILE *out, CPUState *cpu, uint64_t code, size_t size)
88
{
89
- target_ulong pc;
90
+ uint64_t pc;
91
int count;
92
CPUDebug s;
93
94
@@ -XXX,XX +XXX,XX @@ void target_disas(FILE *out, CPUState *cpu, target_ulong code,
95
}
96
97
for (pc = code; size > 0; pc += count, size -= count) {
98
- fprintf(out, "0x" TARGET_FMT_lx ": ", pc);
99
+ fprintf(out, "0x%08" PRIx64 ": ", pc);
100
count = s.info.print_insn(pc, &s.info);
101
fprintf(out, "\n");
102
if (count < 0) {
103
@@ -XXX,XX +XXX,XX @@ char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size)
104
}
105
106
/* Disassemble this for me please... (debugging). */
107
-void disas(FILE *out, const void *code, unsigned long size)
108
+void disas(FILE *out, const void *code, size_t size)
109
{
110
uintptr_t pc;
111
int count;
112
@@ -XXX,XX +XXX,XX @@ void disas(FILE *out, const void *code, unsigned long size)
113
}
114
115
/* Look up symbol for debugging purpose. Returns "" if unknown. */
116
-const char *lookup_symbol(target_ulong orig_addr)
117
+const char *lookup_symbol(uint64_t orig_addr)
118
{
119
const char *symbol = "";
120
struct syminfo *s;
121
@@ -XXX,XX +XXX,XX @@ physical_read_memory(bfd_vma memaddr, bfd_byte *myaddr, int length,
122
}
123
124
/* Disassembler for the monitor. */
125
-void monitor_disas(Monitor *mon, CPUState *cpu,
126
- target_ulong pc, int nb_insn, int is_physical)
127
+void monitor_disas(Monitor *mon, CPUState *cpu, uint64_t pc,
128
+ int nb_insn, bool is_physical)
129
{
130
int count, i;
131
CPUDebug s;
132
@@ -XXX,XX +XXX,XX @@ void monitor_disas(Monitor *mon, CPUState *cpu,
133
}
134
135
if (!s.info.print_insn) {
136
- monitor_printf(mon, "0x" TARGET_FMT_lx
137
+ monitor_printf(mon, "0x%08" PRIx64
138
": Asm output not supported on this arch\n", pc);
139
return;
140
}
141
142
for (i = 0; i < nb_insn; i++) {
143
- g_string_append_printf(ds, "0x" TARGET_FMT_lx ": ", pc);
144
+ g_string_append_printf(ds, "0x%08" PRIx64 ": ", pc);
145
count = s.info.print_insn(pc, &s.info);
146
g_string_append_c(ds, '\n');
147
if (count < 0) {
148
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
149
index XXXXXXX..XXXXXXX 100644
150
--- a/linux-user/elfload.c
151
+++ b/linux-user/elfload.c
152
@@ -XXX,XX +XXX,XX @@ static void load_elf_interp(const char *filename, struct image_info *info,
153
154
static int symfind(const void *s0, const void *s1)
155
{
156
- target_ulong addr = *(target_ulong *)s0;
157
struct elf_sym *sym = (struct elf_sym *)s1;
158
+ __typeof(sym->st_value) addr = *(uint64_t *)s0;
159
int result = 0;
160
+
161
if (addr < sym->st_value) {
162
result = -1;
163
} else if (addr >= sym->st_value + sym->st_size) {
164
@@ -XXX,XX +XXX,XX @@ static int symfind(const void *s0, const void *s1)
165
return result;
166
}
167
168
-static const char *lookup_symbolxx(struct syminfo *s, target_ulong orig_addr)
169
+static const char *lookup_symbolxx(struct syminfo *s, uint64_t orig_addr)
170
{
171
#if ELF_CLASS == ELFCLASS32
172
struct elf_sym *syms = s->disas_symtab.elf32;
173
--
174
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Thomas Huth <thuth@redhat.com>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Message-Id: <20230503072331.1747057-83-richard.henderson@linaro.org>
4
---
5
include/disas/disas.h | 6 ------
6
disas/disas.c | 3 ++-
7
2 files changed, 2 insertions(+), 7 deletions(-)
1
8
9
diff --git a/include/disas/disas.h b/include/disas/disas.h
10
index XXXXXXX..XXXXXXX 100644
11
--- a/include/disas/disas.h
12
+++ b/include/disas/disas.h
13
@@ -XXX,XX +XXX,XX @@
14
#ifndef QEMU_DISAS_H
15
#define QEMU_DISAS_H
16
17
-#include "exec/hwaddr.h"
18
-
19
-#ifdef NEED_CPU_H
20
-#include "cpu.h"
21
-
22
/* Disassemble this for me please... (debugging). */
23
void disas(FILE *out, const void *code, size_t size);
24
void target_disas(FILE *out, CPUState *cpu, uint64_t code, size_t size);
25
@@ -XXX,XX +XXX,XX @@ char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size);
26
27
/* Look up symbol for debugging purpose. Returns "" if unknown. */
28
const char *lookup_symbol(uint64_t orig_addr);
29
-#endif
30
31
struct syminfo;
32
struct elf32_sym;
33
diff --git a/disas/disas.c b/disas/disas.c
34
index XXXXXXX..XXXXXXX 100644
35
--- a/disas/disas.c
36
+++ b/disas/disas.c
37
@@ -XXX,XX +XXX,XX @@
38
#include "disas/dis-asm.h"
39
#include "elf.h"
40
#include "qemu/qemu-print.h"
41
-
42
#include "disas/disas.h"
43
#include "disas/capstone.h"
44
+#include "hw/core/cpu.h"
45
+#include "exec/memory.h"
46
47
typedef struct CPUDebug {
48
struct disassemble_info info;
49
--
50
2.34.1
diff view generated by jsdifflib
New patch
1
From: Thomas Huth <thuth@redhat.com>
1
2
3
We'd like to move disas.c into the common code source set, where
4
CONFIG_USER_ONLY is not available anymore. So we have to move
5
the related code into a separate file instead.
6
7
Signed-off-by: Thomas Huth <thuth@redhat.com>
8
Message-Id: <20230508133745.109463-2-thuth@redhat.com>
9
[rth: Type change done in a separate patch]
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
12
disas/disas-internal.h | 21 ++++++++++++
13
disas/disas-mon.c | 65 ++++++++++++++++++++++++++++++++++++
14
disas/disas.c | 76 ++++--------------------------------------
15
disas/meson.build | 1 +
16
4 files changed, 93 insertions(+), 70 deletions(-)
17
create mode 100644 disas/disas-internal.h
18
create mode 100644 disas/disas-mon.c
19
20
diff --git a/disas/disas-internal.h b/disas/disas-internal.h
21
new file mode 100644
22
index XXXXXXX..XXXXXXX
23
--- /dev/null
24
+++ b/disas/disas-internal.h
25
@@ -XXX,XX +XXX,XX @@
26
+/*
27
+ * Definitions used internally in the disassembly code
28
+ *
29
+ * SPDX-License-Identifier: GPL-2.0-or-later
30
+ */
31
+
32
+#ifndef DISAS_INTERNAL_H
33
+#define DISAS_INTERNAL_H
34
+
35
+#include "disas/dis-asm.h"
36
+
37
+typedef struct CPUDebug {
38
+ struct disassemble_info info;
39
+ CPUState *cpu;
40
+} CPUDebug;
41
+
42
+void disas_initialize_debug_target(CPUDebug *s, CPUState *cpu);
43
+int disas_gstring_printf(FILE *stream, const char *fmt, ...)
44
+ G_GNUC_PRINTF(2, 3);
45
+
46
+#endif
47
diff --git a/disas/disas-mon.c b/disas/disas-mon.c
48
new file mode 100644
49
index XXXXXXX..XXXXXXX
50
--- /dev/null
51
+++ b/disas/disas-mon.c
52
@@ -XXX,XX +XXX,XX @@
53
+/*
54
+ * Functions related to disassembly from the monitor
55
+ *
56
+ * SPDX-License-Identifier: GPL-2.0-or-later
57
+ */
58
+
59
+#include "qemu/osdep.h"
60
+#include "disas-internal.h"
61
+#include "disas/disas.h"
62
+#include "exec/memory.h"
63
+#include "hw/core/cpu.h"
64
+#include "monitor/monitor.h"
65
+
66
+static int
67
+physical_read_memory(bfd_vma memaddr, bfd_byte *myaddr, int length,
68
+ struct disassemble_info *info)
69
+{
70
+ CPUDebug *s = container_of(info, CPUDebug, info);
71
+ MemTxResult res;
72
+
73
+ res = address_space_read(s->cpu->as, memaddr, MEMTXATTRS_UNSPECIFIED,
74
+ myaddr, length);
75
+ return res == MEMTX_OK ? 0 : EIO;
76
+}
77
+
78
+/* Disassembler for the monitor. */
79
+void monitor_disas(Monitor *mon, CPUState *cpu, uint64_t pc,
80
+ int nb_insn, bool is_physical)
81
+{
82
+ int count, i;
83
+ CPUDebug s;
84
+ g_autoptr(GString) ds = g_string_new("");
85
+
86
+ disas_initialize_debug_target(&s, cpu);
87
+ s.info.fprintf_func = disas_gstring_printf;
88
+ s.info.stream = (FILE *)ds; /* abuse this slot */
89
+
90
+ if (is_physical) {
91
+ s.info.read_memory_func = physical_read_memory;
92
+ }
93
+ s.info.buffer_vma = pc;
94
+
95
+ if (s.info.cap_arch >= 0 && cap_disas_monitor(&s.info, pc, nb_insn)) {
96
+ monitor_puts(mon, ds->str);
97
+ return;
98
+ }
99
+
100
+ if (!s.info.print_insn) {
101
+ monitor_printf(mon, "0x%08" PRIx64
102
+ ": Asm output not supported on this arch\n", pc);
103
+ return;
104
+ }
105
+
106
+ for (i = 0; i < nb_insn; i++) {
107
+ g_string_append_printf(ds, "0x%08" PRIx64 ": ", pc);
108
+ count = s.info.print_insn(pc, &s.info);
109
+ g_string_append_c(ds, '\n');
110
+ if (count < 0) {
111
+ break;
112
+ }
113
+ pc += count;
114
+ }
115
+
116
+ monitor_puts(mon, ds->str);
117
+}
118
diff --git a/disas/disas.c b/disas/disas.c
119
index XXXXXXX..XXXXXXX 100644
120
--- a/disas/disas.c
121
+++ b/disas/disas.c
122
@@ -XXX,XX +XXX,XX @@
123
/* General "disassemble this chunk" code. Used for debugging. */
124
#include "qemu/osdep.h"
125
-#include "disas/dis-asm.h"
126
+#include "disas/disas-internal.h"
127
#include "elf.h"
128
#include "qemu/qemu-print.h"
129
#include "disas/disas.h"
130
@@ -XXX,XX +XXX,XX @@
131
#include "hw/core/cpu.h"
132
#include "exec/memory.h"
133
134
-typedef struct CPUDebug {
135
- struct disassemble_info info;
136
- CPUState *cpu;
137
-} CPUDebug;
138
-
139
/* Filled in by elfload.c. Simplistic, but will do for now. */
140
struct syminfo *syminfos = NULL;
141
142
@@ -XXX,XX +XXX,XX @@ static void initialize_debug(CPUDebug *s)
143
s->info.symbol_at_address_func = symbol_at_address;
144
}
145
146
-static void initialize_debug_target(CPUDebug *s, CPUState *cpu)
147
+void disas_initialize_debug_target(CPUDebug *s, CPUState *cpu)
148
{
149
initialize_debug(s);
150
151
@@ -XXX,XX +XXX,XX @@ void target_disas(FILE *out, CPUState *cpu, uint64_t code, size_t size)
152
int count;
153
CPUDebug s;
154
155
- initialize_debug_target(&s, cpu);
156
+ disas_initialize_debug_target(&s, cpu);
157
s.info.fprintf_func = fprintf;
158
s.info.stream = out;
159
s.info.buffer_vma = code;
160
@@ -XXX,XX +XXX,XX @@ void target_disas(FILE *out, CPUState *cpu, uint64_t code, size_t size)
161
}
162
}
163
164
-static int G_GNUC_PRINTF(2, 3)
165
-gstring_printf(FILE *stream, const char *fmt, ...)
166
+int disas_gstring_printf(FILE *stream, const char *fmt, ...)
167
{
168
/* We abuse the FILE parameter to pass a GString. */
169
GString *s = (GString *)stream;
170
@@ -XXX,XX +XXX,XX @@ char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size)
171
CPUDebug s;
172
GString *ds = g_string_new(NULL);
173
174
- initialize_debug_target(&s, cpu);
175
- s.info.fprintf_func = gstring_printf;
176
+ disas_initialize_debug_target(&s, cpu);
177
+ s.info.fprintf_func = disas_gstring_printf;
178
s.info.stream = (FILE *)ds; /* abuse this slot */
179
s.info.buffer_vma = addr;
180
s.info.buffer_length = size;
181
@@ -XXX,XX +XXX,XX @@ const char *lookup_symbol(uint64_t orig_addr)
182
183
return symbol;
184
}
185
-
186
-#if !defined(CONFIG_USER_ONLY)
187
-
188
-#include "monitor/monitor.h"
189
-
190
-static int
191
-physical_read_memory(bfd_vma memaddr, bfd_byte *myaddr, int length,
192
- struct disassemble_info *info)
193
-{
194
- CPUDebug *s = container_of(info, CPUDebug, info);
195
- MemTxResult res;
196
-
197
- res = address_space_read(s->cpu->as, memaddr, MEMTXATTRS_UNSPECIFIED,
198
- myaddr, length);
199
- return res == MEMTX_OK ? 0 : EIO;
200
-}
201
-
202
-/* Disassembler for the monitor. */
203
-void monitor_disas(Monitor *mon, CPUState *cpu, uint64_t pc,
204
- int nb_insn, bool is_physical)
205
-{
206
- int count, i;
207
- CPUDebug s;
208
- g_autoptr(GString) ds = g_string_new("");
209
-
210
- initialize_debug_target(&s, cpu);
211
- s.info.fprintf_func = gstring_printf;
212
- s.info.stream = (FILE *)ds; /* abuse this slot */
213
-
214
- if (is_physical) {
215
- s.info.read_memory_func = physical_read_memory;
216
- }
217
- s.info.buffer_vma = pc;
218
-
219
- if (s.info.cap_arch >= 0 && cap_disas_monitor(&s.info, pc, nb_insn)) {
220
- monitor_puts(mon, ds->str);
221
- return;
222
- }
223
-
224
- if (!s.info.print_insn) {
225
- monitor_printf(mon, "0x%08" PRIx64
226
- ": Asm output not supported on this arch\n", pc);
227
- return;
228
- }
229
-
230
- for (i = 0; i < nb_insn; i++) {
231
- g_string_append_printf(ds, "0x%08" PRIx64 ": ", pc);
232
- count = s.info.print_insn(pc, &s.info);
233
- g_string_append_c(ds, '\n');
234
- if (count < 0) {
235
- break;
236
- }
237
- pc += count;
238
- }
239
-
240
- monitor_puts(mon, ds->str);
241
-}
242
-#endif
243
diff --git a/disas/meson.build b/disas/meson.build
244
index XXXXXXX..XXXXXXX 100644
245
--- a/disas/meson.build
246
+++ b/disas/meson.build
247
@@ -XXX,XX +XXX,XX @@ common_ss.add(when: 'CONFIG_SPARC_DIS', if_true: files('sparc.c'))
248
common_ss.add(when: 'CONFIG_XTENSA_DIS', if_true: files('xtensa.c'))
249
common_ss.add(when: capstone, if_true: [files('capstone.c'), capstone])
250
251
+softmmu_ss.add(files('disas-mon.c'))
252
specific_ss.add(files('disas.c'), capstone)
253
--
254
2.34.1
diff view generated by jsdifflib
New patch
1
From: Thomas Huth <thuth@redhat.com>
1
2
3
By using target_words_bigendian() instead of an ifdef,
4
we can build this code once.
5
6
Signed-off-by: Thomas Huth <thuth@redhat.com>
7
Message-Id: <20230508133745.109463-3-thuth@redhat.com>
8
[rth: Type change done in a separate patch]
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
disas/disas.c | 10 +++++-----
12
disas/meson.build | 3 ++-
13
2 files changed, 7 insertions(+), 6 deletions(-)
14
15
diff --git a/disas/disas.c b/disas/disas.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/disas/disas.c
18
+++ b/disas/disas.c
19
@@ -XXX,XX +XXX,XX @@ void disas_initialize_debug_target(CPUDebug *s, CPUState *cpu)
20
s->cpu = cpu;
21
s->info.read_memory_func = target_read_memory;
22
s->info.print_address_func = print_address;
23
-#if TARGET_BIG_ENDIAN
24
- s->info.endian = BFD_ENDIAN_BIG;
25
-#else
26
- s->info.endian = BFD_ENDIAN_LITTLE;
27
-#endif
28
+ if (target_words_bigendian()) {
29
+ s->info.endian = BFD_ENDIAN_BIG;
30
+ } else {
31
+ s->info.endian = BFD_ENDIAN_LITTLE;
32
+ }
33
34
CPUClass *cc = CPU_GET_CLASS(cpu);
35
if (cc->disas_set_info) {
36
diff --git a/disas/meson.build b/disas/meson.build
37
index XXXXXXX..XXXXXXX 100644
38
--- a/disas/meson.build
39
+++ b/disas/meson.build
40
@@ -XXX,XX +XXX,XX @@ common_ss.add(when: 'CONFIG_SH4_DIS', if_true: files('sh4.c'))
41
common_ss.add(when: 'CONFIG_SPARC_DIS', if_true: files('sparc.c'))
42
common_ss.add(when: 'CONFIG_XTENSA_DIS', if_true: files('xtensa.c'))
43
common_ss.add(when: capstone, if_true: [files('capstone.c'), capstone])
44
+common_ss.add(files('disas.c'))
45
46
softmmu_ss.add(files('disas-mon.c'))
47
-specific_ss.add(files('disas.c'), capstone)
48
+specific_ss.add(capstone)
49
--
50
2.34.1
diff view generated by jsdifflib
New patch
1
From: Jamie Iles <quic_jiles@quicinc.com>
1
2
3
Expose qemu_cpu_list_lock globally so that we can use
4
WITH_QEMU_LOCK_GUARD and QEMU_LOCK_GUARD to simplify a few code paths
5
now and in future.
6
7
Signed-off-by: Jamie Iles <quic_jiles@quicinc.com>
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Message-Id: <20230427020925.51003-2-quic_jiles@quicinc.com>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
13
include/exec/cpu-common.h | 1 +
14
cpus-common.c | 2 +-
15
linux-user/elfload.c | 13 +++++++------
16
migration/dirtyrate.c | 26 +++++++++++++-------------
17
trace/control-target.c | 9 ++++-----
18
5 files changed, 26 insertions(+), 25 deletions(-)
19
20
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
21
index XXXXXXX..XXXXXXX 100644
22
--- a/include/exec/cpu-common.h
23
+++ b/include/exec/cpu-common.h
24
@@ -XXX,XX +XXX,XX @@ extern intptr_t qemu_host_page_mask;
25
#define REAL_HOST_PAGE_ALIGN(addr) ROUND_UP((addr), qemu_real_host_page_size())
26
27
/* The CPU list lock nests outside page_(un)lock or mmap_(un)lock */
28
+extern QemuMutex qemu_cpu_list_lock;
29
void qemu_init_cpu_list(void);
30
void cpu_list_lock(void);
31
void cpu_list_unlock(void);
32
diff --git a/cpus-common.c b/cpus-common.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/cpus-common.c
35
+++ b/cpus-common.c
36
@@ -XXX,XX +XXX,XX @@
37
#include "qemu/lockable.h"
38
#include "trace/trace-root.h"
39
40
-static QemuMutex qemu_cpu_list_lock;
41
+QemuMutex qemu_cpu_list_lock;
42
static QemuCond exclusive_cond;
43
static QemuCond exclusive_resume;
44
static QemuCond qemu_work_cond;
45
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
46
index XXXXXXX..XXXXXXX 100644
47
--- a/linux-user/elfload.c
48
+++ b/linux-user/elfload.c
49
@@ -XXX,XX +XXX,XX @@
50
#include "qemu/guest-random.h"
51
#include "qemu/units.h"
52
#include "qemu/selfmap.h"
53
+#include "qemu/lockable.h"
54
#include "qapi/error.h"
55
#include "qemu/error-report.h"
56
#include "target_signal.h"
57
@@ -XXX,XX +XXX,XX @@ static int fill_note_info(struct elf_note_info *info,
58
info->notes_size += note_size(&info->notes[i]);
59
60
/* read and fill status of all threads */
61
- cpu_list_lock();
62
- CPU_FOREACH(cpu) {
63
- if (cpu == thread_cpu) {
64
- continue;
65
+ WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) {
66
+ CPU_FOREACH(cpu) {
67
+ if (cpu == thread_cpu) {
68
+ continue;
69
+ }
70
+ fill_thread_info(info, cpu->env_ptr);
71
}
72
- fill_thread_info(info, cpu->env_ptr);
73
}
74
- cpu_list_unlock();
75
76
return (0);
77
}
78
diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
79
index XXXXXXX..XXXXXXX 100644
80
--- a/migration/dirtyrate.c
81
+++ b/migration/dirtyrate.c
82
@@ -XXX,XX +XXX,XX @@ int64_t vcpu_calculate_dirtyrate(int64_t calc_time_ms,
83
retry:
84
init_time_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
85
86
- cpu_list_lock();
87
- gen_id = cpu_list_generation_id_get();
88
- records = vcpu_dirty_stat_alloc(stat);
89
- vcpu_dirty_stat_collect(stat, records, true);
90
- cpu_list_unlock();
91
+ WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) {
92
+ gen_id = cpu_list_generation_id_get();
93
+ records = vcpu_dirty_stat_alloc(stat);
94
+ vcpu_dirty_stat_collect(stat, records, true);
95
+ }
96
97
duration = dirty_stat_wait(calc_time_ms, init_time_ms);
98
99
global_dirty_log_sync(flag, one_shot);
100
101
- cpu_list_lock();
102
- if (gen_id != cpu_list_generation_id_get()) {
103
- g_free(records);
104
- g_free(stat->rates);
105
- cpu_list_unlock();
106
- goto retry;
107
+ WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) {
108
+ if (gen_id != cpu_list_generation_id_get()) {
109
+ g_free(records);
110
+ g_free(stat->rates);
111
+ cpu_list_unlock();
112
+ goto retry;
113
+ }
114
+ vcpu_dirty_stat_collect(stat, records, false);
115
}
116
- vcpu_dirty_stat_collect(stat, records, false);
117
- cpu_list_unlock();
118
119
for (i = 0; i < stat->nvcpu; i++) {
120
dirtyrate = do_calculate_dirtyrate(records[i], duration);
121
diff --git a/trace/control-target.c b/trace/control-target.c
122
index XXXXXXX..XXXXXXX 100644
123
--- a/trace/control-target.c
124
+++ b/trace/control-target.c
125
@@ -XXX,XX +XXX,XX @@
126
*/
127
128
#include "qemu/osdep.h"
129
+#include "qemu/lockable.h"
130
#include "cpu.h"
131
#include "trace/trace-root.h"
132
#include "trace/control.h"
133
@@ -XXX,XX +XXX,XX @@ static bool adding_first_cpu1(void)
134
135
static bool adding_first_cpu(void)
136
{
137
- bool res;
138
- cpu_list_lock();
139
- res = adding_first_cpu1();
140
- cpu_list_unlock();
141
- return res;
142
+ QEMU_LOCK_GUARD(&qemu_cpu_list_lock);
143
+
144
+ return adding_first_cpu1();
145
}
146
147
void trace_init_vcpu(CPUState *vcpu)
148
--
149
2.34.1
150
151
diff view generated by jsdifflib
New patch
1
1
From: Jamie Iles <quic_jiles@quicinc.com>
2
3
The round-robin scheduler will iterate over the CPU list with an
4
assigned budget until the next timer expiry and may exit early because
5
of a TB exit. This is fine under normal operation but with icount
6
enabled and SMP it is possible for a CPU to be starved of run time and
7
the system live-locks.
8
9
For example, booting a riscv64 platform with '-icount
10
shift=0,align=off,sleep=on -smp 2' we observe a livelock once the kernel
11
has timers enabled and starts performing TLB shootdowns. In this case
12
we have CPU 0 in M-mode with interrupts disabled sending an IPI to CPU
13
1. As we enter the TCG loop, we assign the icount budget to next timer
14
interrupt to CPU 0 and begin executing where the guest is sat in a busy
15
loop exhausting all of the budget before we try to execute CPU 1 which
16
is the target of the IPI but CPU 1 is left with no budget with which to
17
execute and the process repeats.
18
19
We try here to add some fairness by splitting the budget across all of
20
the CPUs on the thread fairly before entering each one. The CPU count
21
is cached on CPU list generation ID to avoid iterating the list on each
22
loop iteration. With this change it is possible to boot an SMP rv64
23
guest with icount enabled and no hangs.
24
25
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
26
Tested-by: Peter Maydell <peter.maydell@linaro.org>
27
Signed-off-by: Jamie Iles <quic_jiles@quicinc.com>
28
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
29
Message-Id: <20230427020925.51003-3-quic_jiles@quicinc.com>
30
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
31
---
32
accel/tcg/tcg-accel-ops-icount.h | 3 ++-
33
accel/tcg/tcg-accel-ops-icount.c | 21 ++++++++++++++----
34
accel/tcg/tcg-accel-ops-rr.c | 37 +++++++++++++++++++++++++++++++-
35
replay/replay.c | 3 +--
36
4 files changed, 56 insertions(+), 8 deletions(-)
37
38
diff --git a/accel/tcg/tcg-accel-ops-icount.h b/accel/tcg/tcg-accel-ops-icount.h
39
index XXXXXXX..XXXXXXX 100644
40
--- a/accel/tcg/tcg-accel-ops-icount.h
41
+++ b/accel/tcg/tcg-accel-ops-icount.h
42
@@ -XXX,XX +XXX,XX @@
43
#define TCG_ACCEL_OPS_ICOUNT_H
44
45
void icount_handle_deadline(void);
46
-void icount_prepare_for_run(CPUState *cpu);
47
+void icount_prepare_for_run(CPUState *cpu, int64_t cpu_budget);
48
+int64_t icount_percpu_budget(int cpu_count);
49
void icount_process_data(CPUState *cpu);
50
51
void icount_handle_interrupt(CPUState *cpu, int mask);
52
diff --git a/accel/tcg/tcg-accel-ops-icount.c b/accel/tcg/tcg-accel-ops-icount.c
53
index XXXXXXX..XXXXXXX 100644
54
--- a/accel/tcg/tcg-accel-ops-icount.c
55
+++ b/accel/tcg/tcg-accel-ops-icount.c
56
@@ -XXX,XX +XXX,XX @@ void icount_handle_deadline(void)
57
}
58
}
59
60
-void icount_prepare_for_run(CPUState *cpu)
61
+/* Distribute the budget evenly across all CPUs */
62
+int64_t icount_percpu_budget(int cpu_count)
63
+{
64
+ int64_t limit = icount_get_limit();
65
+ int64_t timeslice = limit / cpu_count;
66
+
67
+ if (timeslice == 0) {
68
+ timeslice = limit;
69
+ }
70
+
71
+ return timeslice;
72
+}
73
+
74
+void icount_prepare_for_run(CPUState *cpu, int64_t cpu_budget)
75
{
76
int insns_left;
77
78
@@ -XXX,XX +XXX,XX @@ void icount_prepare_for_run(CPUState *cpu)
79
g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
80
g_assert(cpu->icount_extra == 0);
81
82
- cpu->icount_budget = icount_get_limit();
83
+ replay_mutex_lock();
84
+
85
+ cpu->icount_budget = MIN(icount_get_limit(), cpu_budget);
86
insns_left = MIN(0xffff, cpu->icount_budget);
87
cpu_neg(cpu)->icount_decr.u16.low = insns_left;
88
cpu->icount_extra = cpu->icount_budget - insns_left;
89
90
- replay_mutex_lock();
91
-
92
if (cpu->icount_budget == 0) {
93
/*
94
* We're called without the iothread lock, so must take it while
95
diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c
96
index XXXXXXX..XXXXXXX 100644
97
--- a/accel/tcg/tcg-accel-ops-rr.c
98
+++ b/accel/tcg/tcg-accel-ops-rr.c
99
@@ -XXX,XX +XXX,XX @@
100
*/
101
102
#include "qemu/osdep.h"
103
+#include "qemu/lockable.h"
104
#include "sysemu/tcg.h"
105
#include "sysemu/replay.h"
106
#include "sysemu/cpu-timers.h"
107
@@ -XXX,XX +XXX,XX @@ static void rr_force_rcu(Notifier *notify, void *data)
108
rr_kick_next_cpu();
109
}
110
111
+/*
112
+ * Calculate the number of CPUs that we will process in a single iteration of
113
+ * the main CPU thread loop so that we can fairly distribute the instruction
114
+ * count across CPUs.
115
+ *
116
+ * The CPU count is cached based on the CPU list generation ID to avoid
117
+ * iterating the list every time.
118
+ */
119
+static int rr_cpu_count(void)
120
+{
121
+ static unsigned int last_gen_id = ~0;
122
+ static int cpu_count;
123
+ CPUState *cpu;
124
+
125
+ QEMU_LOCK_GUARD(&qemu_cpu_list_lock);
126
+
127
+ if (cpu_list_generation_id_get() != last_gen_id) {
128
+ cpu_count = 0;
129
+ CPU_FOREACH(cpu) {
130
+ ++cpu_count;
131
+ }
132
+ last_gen_id = cpu_list_generation_id_get();
133
+ }
134
+
135
+ return cpu_count;
136
+}
137
+
138
/*
139
* In the single-threaded case each vCPU is simulated in turn. If
140
* there is more than a single vCPU we create a simple timer to kick
141
@@ -XXX,XX +XXX,XX @@ static void *rr_cpu_thread_fn(void *arg)
142
cpu->exit_request = 1;
143
144
while (1) {
145
+ /* Only used for icount_enabled() */
146
+ int64_t cpu_budget = 0;
147
+
148
qemu_mutex_unlock_iothread();
149
replay_mutex_lock();
150
qemu_mutex_lock_iothread();
151
152
if (icount_enabled()) {
153
+ int cpu_count = rr_cpu_count();
154
+
155
/* Account partial waits to QEMU_CLOCK_VIRTUAL. */
156
icount_account_warp_timer();
157
/*
158
@@ -XXX,XX +XXX,XX @@ static void *rr_cpu_thread_fn(void *arg)
159
* waking up the I/O thread and waiting for completion.
160
*/
161
icount_handle_deadline();
162
+
163
+ cpu_budget = icount_percpu_budget(cpu_count);
164
}
165
166
replay_mutex_unlock();
167
@@ -XXX,XX +XXX,XX @@ static void *rr_cpu_thread_fn(void *arg)
168
169
qemu_mutex_unlock_iothread();
170
if (icount_enabled()) {
171
- icount_prepare_for_run(cpu);
172
+ icount_prepare_for_run(cpu, cpu_budget);
173
}
174
r = tcg_cpus_exec(cpu);
175
if (icount_enabled()) {
176
diff --git a/replay/replay.c b/replay/replay.c
177
index XXXXXXX..XXXXXXX 100644
178
--- a/replay/replay.c
179
+++ b/replay/replay.c
180
@@ -XXX,XX +XXX,XX @@ uint64_t replay_get_current_icount(void)
181
int replay_get_instructions(void)
182
{
183
int res = 0;
184
- replay_mutex_lock();
185
+ g_assert(replay_mutex_locked());
186
if (replay_next_event_is(EVENT_INSTRUCTION)) {
187
res = replay_state.instruction_count;
188
if (replay_break_icount != -1LL) {
189
@@ -XXX,XX +XXX,XX @@ int replay_get_instructions(void)
190
}
191
}
192
}
193
- replay_mutex_unlock();
194
return res;
195
}
196
197
--
198
2.34.1
199
200
diff view generated by jsdifflib
New patch
1
Merge tcg_out_tlb_load, add_qemu_ldst_label,
2
tcg_out_test_alignment, and some code that lived in both
3
tcg_out_qemu_ld and tcg_out_qemu_st into one function
4
that returns HostAddress and TCGLabelQemuLdst structures.
1
5
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tcg/i386/tcg-target.c.inc | 346 ++++++++++++++++----------------------
10
1 file changed, 145 insertions(+), 201 deletions(-)
11
12
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/i386/tcg-target.c.inc
15
+++ b/tcg/i386/tcg-target.c.inc
16
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
17
[MO_BEUQ] = helper_be_stq_mmu,
18
};
19
20
-/* Perform the TLB load and compare.
21
-
22
- Inputs:
23
- ADDRLO and ADDRHI contain the low and high part of the address.
24
-
25
- MEM_INDEX and S_BITS are the memory context and log2 size of the load.
26
-
27
- WHICH is the offset into the CPUTLBEntry structure of the slot to read.
28
- This should be offsetof addr_read or addr_write.
29
-
30
- Outputs:
31
- LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
32
- positions of the displacements of forward jumps to the TLB miss case.
33
-
34
- Second argument register is loaded with the low part of the address.
35
- In the TLB hit case, it has been adjusted as indicated by the TLB
36
- and so is a host address. In the TLB miss case, it continues to
37
- hold a guest address.
38
-
39
- First argument register is clobbered. */
40
-
41
-static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
42
- int mem_index, MemOp opc,
43
- tcg_insn_unit **label_ptr, int which)
44
-{
45
- TCGType ttype = TCG_TYPE_I32;
46
- TCGType tlbtype = TCG_TYPE_I32;
47
- int trexw = 0, hrexw = 0, tlbrexw = 0;
48
- unsigned a_bits = get_alignment_bits(opc);
49
- unsigned s_bits = opc & MO_SIZE;
50
- unsigned a_mask = (1 << a_bits) - 1;
51
- unsigned s_mask = (1 << s_bits) - 1;
52
- target_ulong tlb_mask;
53
-
54
- if (TCG_TARGET_REG_BITS == 64) {
55
- if (TARGET_LONG_BITS == 64) {
56
- ttype = TCG_TYPE_I64;
57
- trexw = P_REXW;
58
- }
59
- if (TCG_TYPE_PTR == TCG_TYPE_I64) {
60
- hrexw = P_REXW;
61
- if (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32) {
62
- tlbtype = TCG_TYPE_I64;
63
- tlbrexw = P_REXW;
64
- }
65
- }
66
- }
67
-
68
- tcg_out_mov(s, tlbtype, TCG_REG_L0, addrlo);
69
- tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0,
70
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
71
-
72
- tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0,
73
- TLB_MASK_TABLE_OFS(mem_index) +
74
- offsetof(CPUTLBDescFast, mask));
75
-
76
- tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L0, TCG_AREG0,
77
- TLB_MASK_TABLE_OFS(mem_index) +
78
- offsetof(CPUTLBDescFast, table));
79
-
80
- /* If the required alignment is at least as large as the access, simply
81
- copy the address and mask. For lesser alignments, check that we don't
82
- cross pages for the complete access. */
83
- if (a_bits >= s_bits) {
84
- tcg_out_mov(s, ttype, TCG_REG_L1, addrlo);
85
- } else {
86
- tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1,
87
- addrlo, s_mask - a_mask);
88
- }
89
- tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
90
- tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0);
91
-
92
- /* cmp 0(TCG_REG_L0), TCG_REG_L1 */
93
- tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw,
94
- TCG_REG_L1, TCG_REG_L0, which);
95
-
96
- /* Prepare for both the fast path add of the tlb addend, and the slow
97
- path function argument setup. */
98
- tcg_out_mov(s, ttype, TCG_REG_L1, addrlo);
99
-
100
- /* jne slow_path */
101
- tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
102
- label_ptr[0] = s->code_ptr;
103
- s->code_ptr += 4;
104
-
105
- if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
106
- /* cmp 4(TCG_REG_L0), addrhi */
107
- tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, TCG_REG_L0, which + 4);
108
-
109
- /* jne slow_path */
110
- tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
111
- label_ptr[1] = s->code_ptr;
112
- s->code_ptr += 4;
113
- }
114
-
115
- /* TLB Hit. */
116
-
117
- /* add addend(TCG_REG_L0), TCG_REG_L1 */
118
- tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L1, TCG_REG_L0,
119
- offsetof(CPUTLBEntry, addend));
120
-}
121
-
122
-/*
123
- * Record the context of a call to the out of line helper code for the slow path
124
- * for a load or store, so that we can later generate the correct helper code
125
- */
126
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld,
127
- TCGType type, MemOpIdx oi,
128
- TCGReg datalo, TCGReg datahi,
129
- TCGReg addrlo, TCGReg addrhi,
130
- tcg_insn_unit *raddr,
131
- tcg_insn_unit **label_ptr)
132
-{
133
- TCGLabelQemuLdst *label = new_ldst_label(s);
134
-
135
- label->is_ld = is_ld;
136
- label->oi = oi;
137
- label->type = type;
138
- label->datalo_reg = datalo;
139
- label->datahi_reg = datahi;
140
- label->addrlo_reg = addrlo;
141
- label->addrhi_reg = addrhi;
142
- label->raddr = tcg_splitwx_to_rx(raddr);
143
- label->label_ptr[0] = label_ptr[0];
144
- if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
145
- label->label_ptr[1] = label_ptr[1];
146
- }
147
-}
148
-
149
/*
150
* Generate code for the slow path for a load at the end of block
151
*/
152
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
153
return true;
154
}
155
#else
156
-
157
-static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo,
158
- TCGReg addrhi, unsigned a_bits)
159
-{
160
- unsigned a_mask = (1 << a_bits) - 1;
161
- TCGLabelQemuLdst *label;
162
-
163
- tcg_out_testi(s, addrlo, a_mask);
164
- /* jne slow_path */
165
- tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
166
-
167
- label = new_ldst_label(s);
168
- label->is_ld = is_ld;
169
- label->addrlo_reg = addrlo;
170
- label->addrhi_reg = addrhi;
171
- label->raddr = tcg_splitwx_to_rx(s->code_ptr + 4);
172
- label->label_ptr[0] = s->code_ptr;
173
-
174
- s->code_ptr += 4;
175
-}
176
-
177
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
178
{
179
/* resolve label address */
180
@@ -XXX,XX +XXX,XX @@ static inline int setup_guest_base_seg(void)
181
#endif /* setup_guest_base_seg */
182
#endif /* SOFTMMU */
183
184
+/*
185
+ * For softmmu, perform the TLB load and compare.
186
+ * For useronly, perform any required alignment tests.
187
+ * In both cases, return a TCGLabelQemuLdst structure if the slow path
188
+ * is required and fill in @h with the host address for the fast path.
189
+ */
190
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
191
+ TCGReg addrlo, TCGReg addrhi,
192
+ MemOpIdx oi, bool is_ld)
193
+{
194
+ TCGLabelQemuLdst *ldst = NULL;
195
+ MemOp opc = get_memop(oi);
196
+ unsigned a_bits = get_alignment_bits(opc);
197
+ unsigned a_mask = (1 << a_bits) - 1;
198
+
199
+#ifdef CONFIG_SOFTMMU
200
+ int cmp_ofs = is_ld ? offsetof(CPUTLBEntry, addr_read)
201
+ : offsetof(CPUTLBEntry, addr_write);
202
+ TCGType ttype = TCG_TYPE_I32;
203
+ TCGType tlbtype = TCG_TYPE_I32;
204
+ int trexw = 0, hrexw = 0, tlbrexw = 0;
205
+ unsigned mem_index = get_mmuidx(oi);
206
+ unsigned s_bits = opc & MO_SIZE;
207
+ unsigned s_mask = (1 << s_bits) - 1;
208
+ target_ulong tlb_mask;
209
+
210
+ ldst = new_ldst_label(s);
211
+ ldst->is_ld = is_ld;
212
+ ldst->oi = oi;
213
+ ldst->addrlo_reg = addrlo;
214
+ ldst->addrhi_reg = addrhi;
215
+
216
+ if (TCG_TARGET_REG_BITS == 64) {
217
+ if (TARGET_LONG_BITS == 64) {
218
+ ttype = TCG_TYPE_I64;
219
+ trexw = P_REXW;
220
+ }
221
+ if (TCG_TYPE_PTR == TCG_TYPE_I64) {
222
+ hrexw = P_REXW;
223
+ if (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32) {
224
+ tlbtype = TCG_TYPE_I64;
225
+ tlbrexw = P_REXW;
226
+ }
227
+ }
228
+ }
229
+
230
+ tcg_out_mov(s, tlbtype, TCG_REG_L0, addrlo);
231
+ tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0,
232
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
233
+
234
+ tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0,
235
+ TLB_MASK_TABLE_OFS(mem_index) +
236
+ offsetof(CPUTLBDescFast, mask));
237
+
238
+ tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L0, TCG_AREG0,
239
+ TLB_MASK_TABLE_OFS(mem_index) +
240
+ offsetof(CPUTLBDescFast, table));
241
+
242
+ /*
243
+ * If the required alignment is at least as large as the access, simply
244
+ * copy the address and mask. For lesser alignments, check that we don't
245
+ * cross pages for the complete access.
246
+ */
247
+ if (a_bits >= s_bits) {
248
+ tcg_out_mov(s, ttype, TCG_REG_L1, addrlo);
249
+ } else {
250
+ tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1,
251
+ addrlo, s_mask - a_mask);
252
+ }
253
+ tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
254
+ tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0);
255
+
256
+ /* cmp 0(TCG_REG_L0), TCG_REG_L1 */
257
+ tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw,
258
+ TCG_REG_L1, TCG_REG_L0, cmp_ofs);
259
+
260
+ /*
261
+ * Prepare for both the fast path add of the tlb addend, and the slow
262
+ * path function argument setup.
263
+ */
264
+ *h = (HostAddress) {
265
+ .base = TCG_REG_L1,
266
+ .index = -1
267
+ };
268
+ tcg_out_mov(s, ttype, h->base, addrlo);
269
+
270
+ /* jne slow_path */
271
+ tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
272
+ ldst->label_ptr[0] = s->code_ptr;
273
+ s->code_ptr += 4;
274
+
275
+ if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
276
+ /* cmp 4(TCG_REG_L0), addrhi */
277
+ tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, TCG_REG_L0, cmp_ofs + 4);
278
+
279
+ /* jne slow_path */
280
+ tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
281
+ ldst->label_ptr[1] = s->code_ptr;
282
+ s->code_ptr += 4;
283
+ }
284
+
285
+ /* TLB Hit. */
286
+
287
+ /* add addend(TCG_REG_L0), TCG_REG_L1 */
288
+ tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, h->base, TCG_REG_L0,
289
+ offsetof(CPUTLBEntry, addend));
290
+#else
291
+ if (a_bits) {
292
+ ldst = new_ldst_label(s);
293
+
294
+ ldst->is_ld = is_ld;
295
+ ldst->oi = oi;
296
+ ldst->addrlo_reg = addrlo;
297
+ ldst->addrhi_reg = addrhi;
298
+
299
+ tcg_out_testi(s, addrlo, a_mask);
300
+ /* jne slow_path */
301
+ tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
302
+ ldst->label_ptr[0] = s->code_ptr;
303
+ s->code_ptr += 4;
304
+ }
305
+
306
+ *h = x86_guest_base;
307
+ h->base = addrlo;
308
+#endif
309
+
310
+ return ldst;
311
+}
312
+
313
static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
314
HostAddress h, TCGType type, MemOp memop)
315
{
316
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
317
TCGReg addrlo, TCGReg addrhi,
318
MemOpIdx oi, TCGType data_type)
319
{
320
- MemOp opc = get_memop(oi);
321
+ TCGLabelQemuLdst *ldst;
322
HostAddress h;
323
324
-#if defined(CONFIG_SOFTMMU)
325
- tcg_insn_unit *label_ptr[2];
326
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
327
+ tcg_out_qemu_ld_direct(s, datalo, datahi, h, data_type, get_memop(oi));
328
329
- tcg_out_tlb_load(s, addrlo, addrhi, get_mmuidx(oi), opc,
330
- label_ptr, offsetof(CPUTLBEntry, addr_read));
331
-
332
- /* TLB Hit. */
333
- h.base = TCG_REG_L1;
334
- h.index = -1;
335
- h.ofs = 0;
336
- h.seg = 0;
337
- tcg_out_qemu_ld_direct(s, datalo, datahi, h, data_type, opc);
338
-
339
- /* Record the current context of a load into ldst label */
340
- add_qemu_ldst_label(s, true, data_type, oi, datalo, datahi,
341
- addrlo, addrhi, s->code_ptr, label_ptr);
342
-#else
343
- unsigned a_bits = get_alignment_bits(opc);
344
- if (a_bits) {
345
- tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
346
+ if (ldst) {
347
+ ldst->type = data_type;
348
+ ldst->datalo_reg = datalo;
349
+ ldst->datahi_reg = datahi;
350
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
351
}
352
-
353
- h = x86_guest_base;
354
- h.base = addrlo;
355
- tcg_out_qemu_ld_direct(s, datalo, datahi, h, data_type, opc);
356
-#endif
357
}
358
359
static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
360
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
361
TCGReg addrlo, TCGReg addrhi,
362
MemOpIdx oi, TCGType data_type)
363
{
364
- MemOp opc = get_memop(oi);
365
+ TCGLabelQemuLdst *ldst;
366
HostAddress h;
367
368
-#if defined(CONFIG_SOFTMMU)
369
- tcg_insn_unit *label_ptr[2];
370
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
371
+ tcg_out_qemu_st_direct(s, datalo, datahi, h, get_memop(oi));
372
373
- tcg_out_tlb_load(s, addrlo, addrhi, get_mmuidx(oi), opc,
374
- label_ptr, offsetof(CPUTLBEntry, addr_write));
375
-
376
- /* TLB Hit. */
377
- h.base = TCG_REG_L1;
378
- h.index = -1;
379
- h.ofs = 0;
380
- h.seg = 0;
381
- tcg_out_qemu_st_direct(s, datalo, datahi, h, opc);
382
-
383
- /* Record the current context of a store into ldst label */
384
- add_qemu_ldst_label(s, false, data_type, oi, datalo, datahi,
385
- addrlo, addrhi, s->code_ptr, label_ptr);
386
-#else
387
- unsigned a_bits = get_alignment_bits(opc);
388
- if (a_bits) {
389
- tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits);
390
+ if (ldst) {
391
+ ldst->type = data_type;
392
+ ldst->datalo_reg = datalo;
393
+ ldst->datahi_reg = datahi;
394
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
395
}
396
-
397
- h = x86_guest_base;
398
- h.base = addrlo;
399
-
400
- tcg_out_qemu_st_direct(s, datalo, datahi, h, opc);
401
-#endif
402
}
403
404
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
405
--
406
2.34.1
407
408
diff view generated by jsdifflib
New patch
1
Since tcg_out_{ld,st}_helper_args, the slow path no longer requires
2
the address argument to be set up by the tlb load sequence. Use a
3
plain load for the addend and indexed addressing with the original
4
input address register.
1
5
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tcg/i386/tcg-target.c.inc | 25 ++++++++++---------------
10
1 file changed, 10 insertions(+), 15 deletions(-)
11
12
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/i386/tcg-target.c.inc
15
+++ b/tcg/i386/tcg-target.c.inc
16
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
17
tcg_out_sti(s, TCG_TYPE_PTR, (uintptr_t)l->raddr, TCG_REG_ESP, ofs);
18
} else {
19
tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
20
- /* The second argument is already loaded with addrlo. */
21
+ tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
22
+ l->addrlo_reg);
23
tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], oi);
24
tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
25
(uintptr_t)l->raddr);
26
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
27
tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, ofs);
28
} else {
29
tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
30
- /* The second argument is already loaded with addrlo. */
31
+ tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
32
+ l->addrlo_reg);
33
tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
34
tcg_target_call_iarg_regs[2], l->datalo_reg);
35
tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], oi);
36
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
37
tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw,
38
TCG_REG_L1, TCG_REG_L0, cmp_ofs);
39
40
- /*
41
- * Prepare for both the fast path add of the tlb addend, and the slow
42
- * path function argument setup.
43
- */
44
- *h = (HostAddress) {
45
- .base = TCG_REG_L1,
46
- .index = -1
47
- };
48
- tcg_out_mov(s, ttype, h->base, addrlo);
49
-
50
/* jne slow_path */
51
tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
52
ldst->label_ptr[0] = s->code_ptr;
53
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
54
}
55
56
/* TLB Hit. */
57
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_L0, TCG_REG_L0,
58
+ offsetof(CPUTLBEntry, addend));
59
60
- /* add addend(TCG_REG_L0), TCG_REG_L1 */
61
- tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, h->base, TCG_REG_L0,
62
- offsetof(CPUTLBEntry, addend));
63
+ *h = (HostAddress) {
64
+ .base = addrlo,
65
+ .index = TCG_REG_L0,
66
+ };
67
#else
68
if (a_bits) {
69
ldst = new_ldst_label(s);
70
--
71
2.34.1
72
73
diff view generated by jsdifflib
New patch
1
Merge tcg_out_tlb_load, add_qemu_ldst_label, tcg_out_test_alignment,
2
and some code that lived in both tcg_out_qemu_ld and tcg_out_qemu_st
3
into one function that returns HostAddress and TCGLabelQemuLdst structures.
1
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/aarch64/tcg-target.c.inc | 313 +++++++++++++++--------------------
9
1 file changed, 133 insertions(+), 180 deletions(-)
10
11
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/aarch64/tcg-target.c.inc
14
+++ b/tcg/aarch64/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
16
tcg_out_goto(s, lb->raddr);
17
return true;
18
}
19
-
20
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
21
- TCGType ext, TCGReg data_reg, TCGReg addr_reg,
22
- tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
23
-{
24
- TCGLabelQemuLdst *label = new_ldst_label(s);
25
-
26
- label->is_ld = is_ld;
27
- label->oi = oi;
28
- label->type = ext;
29
- label->datalo_reg = data_reg;
30
- label->addrlo_reg = addr_reg;
31
- label->raddr = tcg_splitwx_to_rx(raddr);
32
- label->label_ptr[0] = label_ptr;
33
-}
34
-
35
-/* We expect to use a 7-bit scaled negative offset from ENV. */
36
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
37
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
38
-
39
-/* These offsets are built into the LDP below. */
40
-QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
41
-QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
42
-
43
-/* Load and compare a TLB entry, emitting the conditional jump to the
44
- slow path for the failure case, which will be patched later when finalizing
45
- the slow path. Generated code returns the host addend in X1,
46
- clobbers X0,X2,X3,TMP. */
47
-static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
48
- tcg_insn_unit **label_ptr, int mem_index,
49
- bool is_read)
50
-{
51
- unsigned a_bits = get_alignment_bits(opc);
52
- unsigned s_bits = opc & MO_SIZE;
53
- unsigned a_mask = (1u << a_bits) - 1;
54
- unsigned s_mask = (1u << s_bits) - 1;
55
- TCGReg x3;
56
- TCGType mask_type;
57
- uint64_t compare_mask;
58
-
59
- mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
60
- ? TCG_TYPE_I64 : TCG_TYPE_I32);
61
-
62
- /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */
63
- tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
64
- TLB_MASK_TABLE_OFS(mem_index), 1, 0);
65
-
66
- /* Extract the TLB index from the address into X0. */
67
- tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
68
- TCG_REG_X0, TCG_REG_X0, addr_reg,
69
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
70
-
71
- /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */
72
- tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
73
-
74
- /* Load the tlb comparator into X0, and the fast path addend into X1. */
75
- tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
76
- ? offsetof(CPUTLBEntry, addr_read)
77
- : offsetof(CPUTLBEntry, addr_write));
78
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
79
- offsetof(CPUTLBEntry, addend));
80
-
81
- /* For aligned accesses, we check the first byte and include the alignment
82
- bits within the address. For unaligned access, we check that we don't
83
- cross pages using the address of the last byte of the access. */
84
- if (a_bits >= s_bits) {
85
- x3 = addr_reg;
86
- } else {
87
- tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
88
- TCG_REG_X3, addr_reg, s_mask - a_mask);
89
- x3 = TCG_REG_X3;
90
- }
91
- compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
92
-
93
- /* Store the page mask part of the address into X3. */
94
- tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
95
- TCG_REG_X3, x3, compare_mask);
96
-
97
- /* Perform the address comparison. */
98
- tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
99
-
100
- /* If not equal, we jump to the slow path. */
101
- *label_ptr = s->code_ptr;
102
- tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
103
-}
104
-
105
#else
106
-static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg,
107
- unsigned a_bits)
108
-{
109
- unsigned a_mask = (1 << a_bits) - 1;
110
- TCGLabelQemuLdst *label = new_ldst_label(s);
111
-
112
- label->is_ld = is_ld;
113
- label->addrlo_reg = addr_reg;
114
-
115
- /* tst addr, #mask */
116
- tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
117
-
118
- label->label_ptr[0] = s->code_ptr;
119
-
120
- /* b.ne slow_path */
121
- tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
122
-
123
- label->raddr = tcg_splitwx_to_rx(s->code_ptr);
124
-}
125
-
126
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
127
{
128
if (!reloc_pc19(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
129
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
130
}
131
#endif /* CONFIG_SOFTMMU */
132
133
+/*
134
+ * For softmmu, perform the TLB load and compare.
135
+ * For useronly, perform any required alignment tests.
136
+ * In both cases, return a TCGLabelQemuLdst structure if the slow path
137
+ * is required and fill in @h with the host address for the fast path.
138
+ */
139
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
140
+ TCGReg addr_reg, MemOpIdx oi,
141
+ bool is_ld)
142
+{
143
+ TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
144
+ TCGLabelQemuLdst *ldst = NULL;
145
+ MemOp opc = get_memop(oi);
146
+ unsigned a_bits = get_alignment_bits(opc);
147
+ unsigned a_mask = (1u << a_bits) - 1;
148
+
149
+#ifdef CONFIG_SOFTMMU
150
+ unsigned s_bits = opc & MO_SIZE;
151
+ unsigned s_mask = (1u << s_bits) - 1;
152
+ unsigned mem_index = get_mmuidx(oi);
153
+ TCGReg x3;
154
+ TCGType mask_type;
155
+ uint64_t compare_mask;
156
+
157
+ ldst = new_ldst_label(s);
158
+ ldst->is_ld = is_ld;
159
+ ldst->oi = oi;
160
+ ldst->addrlo_reg = addr_reg;
161
+
162
+ mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
163
+ ? TCG_TYPE_I64 : TCG_TYPE_I32);
164
+
165
+ /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */
166
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
167
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
168
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
169
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
170
+ tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
171
+ TLB_MASK_TABLE_OFS(mem_index), 1, 0);
172
+
173
+ /* Extract the TLB index from the address into X0. */
174
+ tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
175
+ TCG_REG_X0, TCG_REG_X0, addr_reg,
176
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
177
+
178
+ /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */
179
+ tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
180
+
181
+ /* Load the tlb comparator into X0, and the fast path addend into X1. */
182
+ tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1,
183
+ is_ld ? offsetof(CPUTLBEntry, addr_read)
184
+ : offsetof(CPUTLBEntry, addr_write));
185
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
186
+ offsetof(CPUTLBEntry, addend));
187
+
188
+ /*
189
+ * For aligned accesses, we check the first byte and include the alignment
190
+ * bits within the address. For unaligned access, we check that we don't
191
+ * cross pages using the address of the last byte of the access.
192
+ */
193
+ if (a_bits >= s_bits) {
194
+ x3 = addr_reg;
195
+ } else {
196
+ tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
197
+ TCG_REG_X3, addr_reg, s_mask - a_mask);
198
+ x3 = TCG_REG_X3;
199
+ }
200
+ compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
201
+
202
+ /* Store the page mask part of the address into X3. */
203
+ tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
204
+ TCG_REG_X3, x3, compare_mask);
205
+
206
+ /* Perform the address comparison. */
207
+ tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
208
+
209
+ /* If not equal, we jump to the slow path. */
210
+ ldst->label_ptr[0] = s->code_ptr;
211
+ tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
212
+
213
+ *h = (HostAddress){
214
+ .base = TCG_REG_X1,
215
+ .index = addr_reg,
216
+ .index_ext = addr_type
217
+ };
218
+#else
219
+ if (a_mask) {
220
+ ldst = new_ldst_label(s);
221
+
222
+ ldst->is_ld = is_ld;
223
+ ldst->oi = oi;
224
+ ldst->addrlo_reg = addr_reg;
225
+
226
+ /* tst addr, #mask */
227
+ tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
228
+
229
+ /* b.ne slow_path */
230
+ ldst->label_ptr[0] = s->code_ptr;
231
+ tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
232
+ }
233
+
234
+ if (USE_GUEST_BASE) {
235
+ *h = (HostAddress){
236
+ .base = TCG_REG_GUEST_BASE,
237
+ .index = addr_reg,
238
+ .index_ext = addr_type
239
+ };
240
+ } else {
241
+ *h = (HostAddress){
242
+ .base = addr_reg,
243
+ .index = TCG_REG_XZR,
244
+ .index_ext = TCG_TYPE_I64
245
+ };
246
+ }
247
+#endif
248
+
249
+ return ldst;
250
+}
251
+
252
static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
253
TCGReg data_r, HostAddress h)
254
{
255
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
256
static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
257
MemOpIdx oi, TCGType data_type)
258
{
259
- MemOp memop = get_memop(oi);
260
- TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
261
+ TCGLabelQemuLdst *ldst;
262
HostAddress h;
263
264
- /* Byte swapping is left to middle-end expansion. */
265
- tcg_debug_assert((memop & MO_BSWAP) == 0);
266
+ ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
267
+ tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h);
268
269
-#ifdef CONFIG_SOFTMMU
270
- tcg_insn_unit *label_ptr;
271
-
272
- tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, get_mmuidx(oi), 1);
273
-
274
- h = (HostAddress){
275
- .base = TCG_REG_X1,
276
- .index = addr_reg,
277
- .index_ext = addr_type
278
- };
279
- tcg_out_qemu_ld_direct(s, memop, data_type, data_reg, h);
280
-
281
- add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg,
282
- s->code_ptr, label_ptr);
283
-#else /* !CONFIG_SOFTMMU */
284
- unsigned a_bits = get_alignment_bits(memop);
285
- if (a_bits) {
286
- tcg_out_test_alignment(s, true, addr_reg, a_bits);
287
+ if (ldst) {
288
+ ldst->type = data_type;
289
+ ldst->datalo_reg = data_reg;
290
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
291
}
292
- if (USE_GUEST_BASE) {
293
- h = (HostAddress){
294
- .base = TCG_REG_GUEST_BASE,
295
- .index = addr_reg,
296
- .index_ext = addr_type
297
- };
298
- } else {
299
- h = (HostAddress){
300
- .base = addr_reg,
301
- .index = TCG_REG_XZR,
302
- .index_ext = TCG_TYPE_I64
303
- };
304
- }
305
- tcg_out_qemu_ld_direct(s, memop, data_type, data_reg, h);
306
-#endif /* CONFIG_SOFTMMU */
307
}
308
309
static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
310
MemOpIdx oi, TCGType data_type)
311
{
312
- MemOp memop = get_memop(oi);
313
- TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
314
+ TCGLabelQemuLdst *ldst;
315
HostAddress h;
316
317
- /* Byte swapping is left to middle-end expansion. */
318
- tcg_debug_assert((memop & MO_BSWAP) == 0);
319
+ ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
320
+ tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h);
321
322
-#ifdef CONFIG_SOFTMMU
323
- tcg_insn_unit *label_ptr;
324
-
325
- tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, get_mmuidx(oi), 0);
326
-
327
- h = (HostAddress){
328
- .base = TCG_REG_X1,
329
- .index = addr_reg,
330
- .index_ext = addr_type
331
- };
332
- tcg_out_qemu_st_direct(s, memop, data_reg, h);
333
-
334
- add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg,
335
- s->code_ptr, label_ptr);
336
-#else /* !CONFIG_SOFTMMU */
337
- unsigned a_bits = get_alignment_bits(memop);
338
- if (a_bits) {
339
- tcg_out_test_alignment(s, false, addr_reg, a_bits);
340
+ if (ldst) {
341
+ ldst->type = data_type;
342
+ ldst->datalo_reg = data_reg;
343
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
344
}
345
- if (USE_GUEST_BASE) {
346
- h = (HostAddress){
347
- .base = TCG_REG_GUEST_BASE,
348
- .index = addr_reg,
349
- .index_ext = addr_type
350
- };
351
- } else {
352
- h = (HostAddress){
353
- .base = addr_reg,
354
- .index = TCG_REG_XZR,
355
- .index_ext = TCG_TYPE_I64
356
- };
357
- }
358
- tcg_out_qemu_st_direct(s, memop, data_reg, h);
359
-#endif /* CONFIG_SOFTMMU */
360
}
361
362
static const tcg_insn_unit *tb_ret_addr;
363
--
364
2.34.1
365
366
diff view generated by jsdifflib
New patch
1
Merge tcg_out_tlb_load, add_qemu_ldst_label, and some code that lived
2
in both tcg_out_qemu_ld and tcg_out_qemu_st into one function that
3
returns HostAddress and TCGLabelQemuLdst structures.
1
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/arm/tcg-target.c.inc | 351 ++++++++++++++++++---------------------
9
1 file changed, 159 insertions(+), 192 deletions(-)
10
11
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/arm/tcg-target.c.inc
14
+++ b/tcg/arm/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
16
}
17
}
18
19
-#define TLB_SHIFT    (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
20
-
21
-/* We expect to use an 9-bit sign-magnitude negative offset from ENV. */
22
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
23
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -256);
24
-
25
-/* These offsets are built into the LDRD below. */
26
-QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
27
-QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4);
28
-
29
-/* Load and compare a TLB entry, leaving the flags set. Returns the register
30
- containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */
31
-
32
-static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
33
- MemOp opc, int mem_index, bool is_load)
34
-{
35
- int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read)
36
- : offsetof(CPUTLBEntry, addr_write));
37
- int fast_off = TLB_MASK_TABLE_OFS(mem_index);
38
- unsigned s_mask = (1 << (opc & MO_SIZE)) - 1;
39
- unsigned a_mask = (1 << get_alignment_bits(opc)) - 1;
40
- TCGReg t_addr;
41
-
42
- /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {r0,r1}. */
43
- tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off);
44
-
45
- /* Extract the tlb index from the address into R0. */
46
- tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo,
47
- SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS));
48
-
49
- /*
50
- * Add the tlb_table pointer, creating the CPUTLBEntry address in R1.
51
- * Load the tlb comparator into R2/R3 and the fast path addend into R1.
52
- */
53
- if (cmp_off == 0) {
54
- if (TARGET_LONG_BITS == 64) {
55
- tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
56
- } else {
57
- tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
58
- }
59
- } else {
60
- tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
61
- TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0);
62
- if (TARGET_LONG_BITS == 64) {
63
- tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
64
- } else {
65
- tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
66
- }
67
- }
68
-
69
- /* Load the tlb addend. */
70
- tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R1,
71
- offsetof(CPUTLBEntry, addend));
72
-
73
- /*
74
- * Check alignment, check comparators.
75
- * Do this in 2-4 insns. Use MOVW for v7, if possible,
76
- * to reduce the number of sequential conditional instructions.
77
- * Almost all guests have at least 4k pages, which means that we need
78
- * to clear at least 9 bits even for an 8-byte memory, which means it
79
- * isn't worth checking for an immediate operand for BIC.
80
- *
81
- * For unaligned accesses, test the page of the last unit of alignment.
82
- * This leaves the least significant alignment bits unchanged, and of
83
- * course must be zero.
84
- */
85
- t_addr = addrlo;
86
- if (a_mask < s_mask) {
87
- t_addr = TCG_REG_R0;
88
- tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr,
89
- addrlo, s_mask - a_mask);
90
- }
91
- if (use_armv7_instructions && TARGET_PAGE_BITS <= 16) {
92
- tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(TARGET_PAGE_MASK | a_mask));
93
- tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
94
- t_addr, TCG_REG_TMP, 0);
95
- tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, 0);
96
- } else {
97
- if (a_mask) {
98
- tcg_debug_assert(a_mask <= 0xff);
99
- tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
100
- }
101
- tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, t_addr,
102
- SHIFT_IMM_LSR(TARGET_PAGE_BITS));
103
- tcg_out_dat_reg(s, (a_mask ? COND_EQ : COND_AL), ARITH_CMP,
104
- 0, TCG_REG_R2, TCG_REG_TMP,
105
- SHIFT_IMM_LSL(TARGET_PAGE_BITS));
106
- }
107
-
108
- if (TARGET_LONG_BITS == 64) {
109
- tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0);
110
- }
111
-
112
- return TCG_REG_R1;
113
-}
114
-
115
-/* Record the context of a call to the out of line helper code for the slow
116
- path for a load or store, so that we can later generate the correct
117
- helper code. */
118
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld,
119
- MemOpIdx oi, TCGType type,
120
- TCGReg datalo, TCGReg datahi,
121
- TCGReg addrlo, TCGReg addrhi,
122
- tcg_insn_unit *raddr,
123
- tcg_insn_unit *label_ptr)
124
-{
125
- TCGLabelQemuLdst *label = new_ldst_label(s);
126
-
127
- label->is_ld = is_ld;
128
- label->oi = oi;
129
- label->type = type;
130
- label->datalo_reg = datalo;
131
- label->datahi_reg = datahi;
132
- label->addrlo_reg = addrlo;
133
- label->addrhi_reg = addrhi;
134
- label->raddr = tcg_splitwx_to_rx(raddr);
135
- label->label_ptr[0] = label_ptr;
136
-}
137
-
138
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
139
{
140
TCGReg argreg;
141
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
142
return true;
143
}
144
#else
145
-
146
-static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo,
147
- TCGReg addrhi, unsigned a_bits)
148
-{
149
- unsigned a_mask = (1 << a_bits) - 1;
150
- TCGLabelQemuLdst *label = new_ldst_label(s);
151
-
152
- label->is_ld = is_ld;
153
- label->addrlo_reg = addrlo;
154
- label->addrhi_reg = addrhi;
155
-
156
- /* We are expecting a_bits to max out at 7, and can easily support 8. */
157
- tcg_debug_assert(a_mask <= 0xff);
158
- /* tst addr, #mask */
159
- tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
160
-
161
- /* blne slow_path */
162
- label->label_ptr[0] = s->code_ptr;
163
- tcg_out_bl_imm(s, COND_NE, 0);
164
-
165
- label->raddr = tcg_splitwx_to_rx(s->code_ptr);
166
-}
167
-
168
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
169
{
170
if (!reloc_pc24(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
171
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
172
}
173
#endif /* SOFTMMU */
174
175
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
176
+ TCGReg addrlo, TCGReg addrhi,
177
+ MemOpIdx oi, bool is_ld)
178
+{
179
+ TCGLabelQemuLdst *ldst = NULL;
180
+ MemOp opc = get_memop(oi);
181
+ MemOp a_bits = get_alignment_bits(opc);
182
+ unsigned a_mask = (1 << a_bits) - 1;
183
+
184
+#ifdef CONFIG_SOFTMMU
185
+ int mem_index = get_mmuidx(oi);
186
+ int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
187
+ : offsetof(CPUTLBEntry, addr_write);
188
+ int fast_off = TLB_MASK_TABLE_OFS(mem_index);
189
+ unsigned s_mask = (1 << (opc & MO_SIZE)) - 1;
190
+ TCGReg t_addr;
191
+
192
+ ldst = new_ldst_label(s);
193
+ ldst->is_ld = is_ld;
194
+ ldst->oi = oi;
195
+ ldst->addrlo_reg = addrlo;
196
+ ldst->addrhi_reg = addrhi;
197
+
198
+ /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {r0,r1}. */
199
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
200
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -256);
201
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
202
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4);
203
+ tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off);
204
+
205
+ /* Extract the tlb index from the address into R0. */
206
+ tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo,
207
+ SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS));
208
+
209
+ /*
210
+ * Add the tlb_table pointer, creating the CPUTLBEntry address in R1.
211
+ * Load the tlb comparator into R2/R3 and the fast path addend into R1.
212
+ */
213
+ if (cmp_off == 0) {
214
+ if (TARGET_LONG_BITS == 64) {
215
+ tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
216
+ } else {
217
+ tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
218
+ }
219
+ } else {
220
+ tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
221
+ TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0);
222
+ if (TARGET_LONG_BITS == 64) {
223
+ tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
224
+ } else {
225
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
226
+ }
227
+ }
228
+
229
+ /* Load the tlb addend. */
230
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R1,
231
+ offsetof(CPUTLBEntry, addend));
232
+
233
+ /*
234
+ * Check alignment, check comparators.
235
+ * Do this in 2-4 insns. Use MOVW for v7, if possible,
236
+ * to reduce the number of sequential conditional instructions.
237
+ * Almost all guests have at least 4k pages, which means that we need
238
+ * to clear at least 9 bits even for an 8-byte memory, which means it
239
+ * isn't worth checking for an immediate operand for BIC.
240
+ *
241
+ * For unaligned accesses, test the page of the last unit of alignment.
242
+ * This leaves the least significant alignment bits unchanged, and of
243
+ * course must be zero.
244
+ */
245
+ t_addr = addrlo;
246
+ if (a_mask < s_mask) {
247
+ t_addr = TCG_REG_R0;
248
+ tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr,
249
+ addrlo, s_mask - a_mask);
250
+ }
251
+ if (use_armv7_instructions && TARGET_PAGE_BITS <= 16) {
252
+ tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(TARGET_PAGE_MASK | a_mask));
253
+ tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
254
+ t_addr, TCG_REG_TMP, 0);
255
+ tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, 0);
256
+ } else {
257
+ if (a_mask) {
258
+ tcg_debug_assert(a_mask <= 0xff);
259
+ tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
260
+ }
261
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, t_addr,
262
+ SHIFT_IMM_LSR(TARGET_PAGE_BITS));
263
+ tcg_out_dat_reg(s, (a_mask ? COND_EQ : COND_AL), ARITH_CMP,
264
+ 0, TCG_REG_R2, TCG_REG_TMP,
265
+ SHIFT_IMM_LSL(TARGET_PAGE_BITS));
266
+ }
267
+
268
+ if (TARGET_LONG_BITS == 64) {
269
+ tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0);
270
+ }
271
+
272
+ *h = (HostAddress){
273
+ .cond = COND_AL,
274
+ .base = addrlo,
275
+ .index = TCG_REG_R1,
276
+ .index_scratch = true,
277
+ };
278
+#else
279
+ if (a_mask) {
280
+ ldst = new_ldst_label(s);
281
+ ldst->is_ld = is_ld;
282
+ ldst->oi = oi;
283
+ ldst->addrlo_reg = addrlo;
284
+ ldst->addrhi_reg = addrhi;
285
+
286
+ /* We are expecting a_bits to max out at 7 */
287
+ tcg_debug_assert(a_mask <= 0xff);
288
+ /* tst addr, #mask */
289
+ tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
290
+ }
291
+
292
+ *h = (HostAddress){
293
+ .cond = COND_AL,
294
+ .base = addrlo,
295
+ .index = guest_base ? TCG_REG_GUEST_BASE : -1,
296
+ .index_scratch = false,
297
+ };
298
+#endif
299
+
300
+ return ldst;
301
+}
302
+
303
static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg datalo,
304
TCGReg datahi, HostAddress h)
305
{
306
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
307
MemOpIdx oi, TCGType data_type)
308
{
309
MemOp opc = get_memop(oi);
310
+ TCGLabelQemuLdst *ldst;
311
HostAddress h;
312
313
-#ifdef CONFIG_SOFTMMU
314
- h.cond = COND_AL;
315
- h.base = addrlo;
316
- h.index_scratch = true;
317
- h.index = tcg_out_tlb_read(s, addrlo, addrhi, opc, get_mmuidx(oi), 1);
318
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
319
+ if (ldst) {
320
+ ldst->type = data_type;
321
+ ldst->datalo_reg = datalo;
322
+ ldst->datahi_reg = datahi;
323
324
- /*
325
- * This a conditional BL only to load a pointer within this opcode into
326
- * LR for the slow path. We will not be using the value for a tail call.
327
- */
328
- tcg_insn_unit *label_ptr = s->code_ptr;
329
- tcg_out_bl_imm(s, COND_NE, 0);
330
+ /*
331
+ * This a conditional BL only to load a pointer within this
332
+ * opcode into LR for the slow path. We will not be using
333
+ * the value for a tail call.
334
+ */
335
+ ldst->label_ptr[0] = s->code_ptr;
336
+ tcg_out_bl_imm(s, COND_NE, 0);
337
338
- tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h);
339
-
340
- add_qemu_ldst_label(s, true, oi, data_type, datalo, datahi,
341
- addrlo, addrhi, s->code_ptr, label_ptr);
342
-#else
343
- unsigned a_bits = get_alignment_bits(opc);
344
- if (a_bits) {
345
- tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
346
+ tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h);
347
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
348
+ } else {
349
+ tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h);
350
}
351
-
352
- h.cond = COND_AL;
353
- h.base = addrlo;
354
- h.index = guest_base ? TCG_REG_GUEST_BASE : -1;
355
- h.index_scratch = false;
356
- tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h);
357
-#endif
358
}
359
360
static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo,
361
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
362
MemOpIdx oi, TCGType data_type)
363
{
364
MemOp opc = get_memop(oi);
365
+ TCGLabelQemuLdst *ldst;
366
HostAddress h;
367
368
-#ifdef CONFIG_SOFTMMU
369
- h.cond = COND_EQ;
370
- h.base = addrlo;
371
- h.index_scratch = true;
372
- h.index = tcg_out_tlb_read(s, addrlo, addrhi, opc, get_mmuidx(oi), 0);
373
- tcg_out_qemu_st_direct(s, opc, datalo, datahi, h);
374
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
375
+ if (ldst) {
376
+ ldst->type = data_type;
377
+ ldst->datalo_reg = datalo;
378
+ ldst->datahi_reg = datahi;
379
380
- /* The conditional call must come last, as we're going to return here. */
381
- tcg_insn_unit *label_ptr = s->code_ptr;
382
- tcg_out_bl_imm(s, COND_NE, 0);
383
-
384
- add_qemu_ldst_label(s, false, oi, data_type, datalo, datahi,
385
- addrlo, addrhi, s->code_ptr, label_ptr);
386
-#else
387
- unsigned a_bits = get_alignment_bits(opc);
388
-
389
- h.cond = COND_AL;
390
- if (a_bits) {
391
- tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits);
392
h.cond = COND_EQ;
393
- }
394
+ tcg_out_qemu_st_direct(s, opc, datalo, datahi, h);
395
396
- h.base = addrlo;
397
- h.index = guest_base ? TCG_REG_GUEST_BASE : -1;
398
- h.index_scratch = false;
399
- tcg_out_qemu_st_direct(s, opc, datalo, datahi, h);
400
-#endif
401
+ /* The conditional call is last, as we're going to return here. */
402
+ ldst->label_ptr[0] = s->code_ptr;
403
+ tcg_out_bl_imm(s, COND_NE, 0);
404
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
405
+ } else {
406
+ tcg_out_qemu_st_direct(s, opc, datalo, datahi, h);
407
+ }
408
}
409
410
static void tcg_out_epilogue(TCGContext *s);
411
--
412
2.34.1
413
414
diff view generated by jsdifflib
New patch
1
Merge tcg_out_tlb_load, add_qemu_ldst_label, tcg_out_test_alignment,
2
tcg_out_zext_addr_if_32_bit, and some code that lived in both
3
tcg_out_qemu_ld and tcg_out_qemu_st into one function that returns
4
HostAddress and TCGLabelQemuLdst structures.
1
5
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tcg/loongarch64/tcg-target.c.inc | 255 +++++++++++++------------------
10
1 file changed, 105 insertions(+), 150 deletions(-)
11
12
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/loongarch64/tcg-target.c.inc
15
+++ b/tcg/loongarch64/tcg-target.c.inc
16
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[4] = {
17
[MO_64] = helper_le_stq_mmu,
18
};
19
20
-/* We expect to use a 12-bit negative offset from ENV. */
21
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
22
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11));
23
-
24
static bool tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
25
{
26
tcg_out_opc_b(s, 0);
27
return reloc_br_sd10k16(s->code_ptr - 1, target);
28
}
29
30
-/*
31
- * Emits common code for TLB addend lookup, that eventually loads the
32
- * addend in TCG_REG_TMP2.
33
- */
34
-static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl, MemOpIdx oi,
35
- tcg_insn_unit **label_ptr, bool is_load)
36
-{
37
- MemOp opc = get_memop(oi);
38
- unsigned s_bits = opc & MO_SIZE;
39
- unsigned a_bits = get_alignment_bits(opc);
40
- tcg_target_long compare_mask;
41
- int mem_index = get_mmuidx(oi);
42
- int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
43
- int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
44
- int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
45
-
46
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
47
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
48
-
49
- tcg_out_opc_srli_d(s, TCG_REG_TMP2, addrl,
50
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
51
- tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
52
- tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
53
-
54
- /* Load the tlb comparator and the addend. */
55
- tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2,
56
- is_load ? offsetof(CPUTLBEntry, addr_read)
57
- : offsetof(CPUTLBEntry, addr_write));
58
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
59
- offsetof(CPUTLBEntry, addend));
60
-
61
- /* We don't support unaligned accesses. */
62
- if (a_bits < s_bits) {
63
- a_bits = s_bits;
64
- }
65
- /* Clear the non-page, non-alignment bits from the address. */
66
- compare_mask = (tcg_target_long)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
67
- tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
68
- tcg_out_opc_and(s, TCG_REG_TMP1, TCG_REG_TMP1, addrl);
69
-
70
- /* Compare masked address with the TLB entry. */
71
- label_ptr[0] = s->code_ptr;
72
- tcg_out_opc_bne(s, TCG_REG_TMP0, TCG_REG_TMP1, 0);
73
-
74
- /* TLB Hit - addend in TCG_REG_TMP2, ready for use. */
75
-}
76
-
77
-static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi,
78
- TCGType type,
79
- TCGReg datalo, TCGReg addrlo,
80
- void *raddr, tcg_insn_unit **label_ptr)
81
-{
82
- TCGLabelQemuLdst *label = new_ldst_label(s);
83
-
84
- label->is_ld = is_ld;
85
- label->oi = oi;
86
- label->type = type;
87
- label->datalo_reg = datalo;
88
- label->datahi_reg = 0; /* unused */
89
- label->addrlo_reg = addrlo;
90
- label->addrhi_reg = 0; /* unused */
91
- label->raddr = tcg_splitwx_to_rx(raddr);
92
- label->label_ptr[0] = label_ptr[0];
93
-}
94
-
95
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
96
{
97
MemOpIdx oi = l->oi;
98
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
99
return tcg_out_goto(s, l->raddr);
100
}
101
#else
102
-
103
-/*
104
- * Alignment helpers for user-mode emulation
105
- */
106
-
107
-static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg,
108
- unsigned a_bits)
109
-{
110
- TCGLabelQemuLdst *l = new_ldst_label(s);
111
-
112
- l->is_ld = is_ld;
113
- l->addrlo_reg = addr_reg;
114
-
115
- /*
116
- * Without micro-architecture details, we don't know which of bstrpick or
117
- * andi is faster, so use bstrpick as it's not constrained by imm field
118
- * width. (Not to say alignments >= 2^12 are going to happen any time
119
- * soon, though)
120
- */
121
- tcg_out_opc_bstrpick_d(s, TCG_REG_TMP1, addr_reg, 0, a_bits - 1);
122
-
123
- l->label_ptr[0] = s->code_ptr;
124
- tcg_out_opc_bne(s, TCG_REG_TMP1, TCG_REG_ZERO, 0);
125
-
126
- l->raddr = tcg_splitwx_to_rx(s->code_ptr);
127
-}
128
-
129
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
130
{
131
/* resolve label address */
132
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
133
134
#endif /* CONFIG_SOFTMMU */
135
136
-/*
137
- * `ext32u` the address register into the temp register given,
138
- * if target is 32-bit, no-op otherwise.
139
- *
140
- * Returns the address register ready for use with TLB addend.
141
- */
142
-static TCGReg tcg_out_zext_addr_if_32_bit(TCGContext *s,
143
- TCGReg addr, TCGReg tmp)
144
-{
145
- if (TARGET_LONG_BITS == 32) {
146
- tcg_out_ext32u(s, tmp, addr);
147
- return tmp;
148
- }
149
- return addr;
150
-}
151
-
152
typedef struct {
153
TCGReg base;
154
TCGReg index;
155
} HostAddress;
156
157
+/*
158
+ * For softmmu, perform the TLB load and compare.
159
+ * For useronly, perform any required alignment tests.
160
+ * In both cases, return a TCGLabelQemuLdst structure if the slow path
161
+ * is required and fill in @h with the host address for the fast path.
162
+ */
163
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
164
+ TCGReg addr_reg, MemOpIdx oi,
165
+ bool is_ld)
166
+{
167
+ TCGLabelQemuLdst *ldst = NULL;
168
+ MemOp opc = get_memop(oi);
169
+ unsigned a_bits = get_alignment_bits(opc);
170
+
171
+#ifdef CONFIG_SOFTMMU
172
+ unsigned s_bits = opc & MO_SIZE;
173
+ int mem_index = get_mmuidx(oi);
174
+ int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
175
+ int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
176
+ int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
177
+ tcg_target_long compare_mask;
178
+
179
+ ldst = new_ldst_label(s);
180
+ ldst->is_ld = is_ld;
181
+ ldst->oi = oi;
182
+ ldst->addrlo_reg = addr_reg;
183
+
184
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
185
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11));
186
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
187
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
188
+
189
+ tcg_out_opc_srli_d(s, TCG_REG_TMP2, addr_reg,
190
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
191
+ tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
192
+ tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
193
+
194
+ /* Load the tlb comparator and the addend. */
195
+ tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2,
196
+ is_ld ? offsetof(CPUTLBEntry, addr_read)
197
+ : offsetof(CPUTLBEntry, addr_write));
198
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
199
+ offsetof(CPUTLBEntry, addend));
200
+
201
+ /* We don't support unaligned accesses. */
202
+ if (a_bits < s_bits) {
203
+ a_bits = s_bits;
204
+ }
205
+ /* Clear the non-page, non-alignment bits from the address. */
206
+ compare_mask = (tcg_target_long)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
207
+ tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
208
+ tcg_out_opc_and(s, TCG_REG_TMP1, TCG_REG_TMP1, addr_reg);
209
+
210
+ /* Compare masked address with the TLB entry. */
211
+ ldst->label_ptr[0] = s->code_ptr;
212
+ tcg_out_opc_bne(s, TCG_REG_TMP0, TCG_REG_TMP1, 0);
213
+
214
+ h->index = TCG_REG_TMP2;
215
+#else
216
+ if (a_bits) {
217
+ ldst = new_ldst_label(s);
218
+
219
+ ldst->is_ld = is_ld;
220
+ ldst->oi = oi;
221
+ ldst->addrlo_reg = addr_reg;
222
+
223
+ /*
224
+ * Without micro-architecture details, we don't know which of
225
+ * bstrpick or andi is faster, so use bstrpick as it's not
226
+ * constrained by imm field width. Not to say alignments >= 2^12
227
+ * are going to happen any time soon.
228
+ */
229
+ tcg_out_opc_bstrpick_d(s, TCG_REG_TMP1, addr_reg, 0, a_bits - 1);
230
+
231
+ ldst->label_ptr[0] = s->code_ptr;
232
+ tcg_out_opc_bne(s, TCG_REG_TMP1, TCG_REG_ZERO, 0);
233
+ }
234
+
235
+ h->index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
236
+#endif
237
+
238
+ if (TARGET_LONG_BITS == 32) {
239
+ h->base = TCG_REG_TMP0;
240
+ tcg_out_ext32u(s, h->base, addr_reg);
241
+ } else {
242
+ h->base = addr_reg;
243
+ }
244
+
245
+ return ldst;
246
+}
247
+
248
static void tcg_out_qemu_ld_indexed(TCGContext *s, MemOp opc, TCGType type,
249
TCGReg rd, HostAddress h)
250
{
251
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_indexed(TCGContext *s, MemOp opc, TCGType type,
252
static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
253
MemOpIdx oi, TCGType data_type)
254
{
255
- MemOp opc = get_memop(oi);
256
+ TCGLabelQemuLdst *ldst;
257
HostAddress h;
258
259
-#ifdef CONFIG_SOFTMMU
260
- tcg_insn_unit *label_ptr[1];
261
+ ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
262
+ tcg_out_qemu_ld_indexed(s, get_memop(oi), data_type, data_reg, h);
263
264
- tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 1);
265
- h.index = TCG_REG_TMP2;
266
-#else
267
- unsigned a_bits = get_alignment_bits(opc);
268
- if (a_bits) {
269
- tcg_out_test_alignment(s, true, addr_reg, a_bits);
270
+ if (ldst) {
271
+ ldst->type = data_type;
272
+ ldst->datalo_reg = data_reg;
273
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
274
}
275
- h.index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
276
-#endif
277
-
278
- h.base = tcg_out_zext_addr_if_32_bit(s, addr_reg, TCG_REG_TMP0);
279
- tcg_out_qemu_ld_indexed(s, opc, data_type, data_reg, h);
280
-
281
-#ifdef CONFIG_SOFTMMU
282
- add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg,
283
- s->code_ptr, label_ptr);
284
-#endif
285
}
286
287
static void tcg_out_qemu_st_indexed(TCGContext *s, MemOp opc,
288
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_indexed(TCGContext *s, MemOp opc,
289
static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
290
MemOpIdx oi, TCGType data_type)
291
{
292
- MemOp opc = get_memop(oi);
293
+ TCGLabelQemuLdst *ldst;
294
HostAddress h;
295
296
-#ifdef CONFIG_SOFTMMU
297
- tcg_insn_unit *label_ptr[1];
298
+ ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
299
+ tcg_out_qemu_st_indexed(s, get_memop(oi), data_reg, h);
300
301
- tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 0);
302
- h.index = TCG_REG_TMP2;
303
-#else
304
- unsigned a_bits = get_alignment_bits(opc);
305
- if (a_bits) {
306
- tcg_out_test_alignment(s, false, addr_reg, a_bits);
307
+ if (ldst) {
308
+ ldst->type = data_type;
309
+ ldst->datalo_reg = data_reg;
310
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
311
}
312
- h.index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
313
-#endif
314
-
315
- h.base = tcg_out_zext_addr_if_32_bit(s, addr_reg, TCG_REG_TMP0);
316
- tcg_out_qemu_st_indexed(s, opc, data_reg, h);
317
-
318
-#ifdef CONFIG_SOFTMMU
319
- add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg,
320
- s->code_ptr, label_ptr);
321
-#endif
322
}
323
324
/*
325
--
326
2.34.1
327
328
diff view generated by jsdifflib
New patch
1
Merge tcg_out_tlb_load, add_qemu_ldst_label, tcg_out_test_alignment,
2
and some code that lived in both tcg_out_qemu_ld and tcg_out_qemu_st
3
into one function that returns HostAddress and TCGLabelQemuLdst structures.
1
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/mips/tcg-target.c.inc | 404 ++++++++++++++++----------------------
9
1 file changed, 172 insertions(+), 232 deletions(-)
10
11
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/mips/tcg-target.c.inc
14
+++ b/tcg/mips/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static int tcg_out_call_iarg_reg2(TCGContext *s, int i, TCGReg al, TCGReg ah)
16
return i;
17
}
18
19
-/* We expect to use a 16-bit negative offset from ENV. */
20
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
21
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
22
-
23
-/*
24
- * Perform the tlb comparison operation.
25
- * The complete host address is placed in BASE.
26
- * Clobbers TMP0, TMP1, TMP2, TMP3.
27
- */
28
-static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
29
- TCGReg addrh, MemOpIdx oi,
30
- tcg_insn_unit *label_ptr[2], bool is_load)
31
-{
32
- MemOp opc = get_memop(oi);
33
- unsigned a_bits = get_alignment_bits(opc);
34
- unsigned s_bits = opc & MO_SIZE;
35
- unsigned a_mask = (1 << a_bits) - 1;
36
- unsigned s_mask = (1 << s_bits) - 1;
37
- int mem_index = get_mmuidx(oi);
38
- int fast_off = TLB_MASK_TABLE_OFS(mem_index);
39
- int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
40
- int table_off = fast_off + offsetof(CPUTLBDescFast, table);
41
- int add_off = offsetof(CPUTLBEntry, addend);
42
- int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read)
43
- : offsetof(CPUTLBEntry, addr_write));
44
- target_ulong tlb_mask;
45
-
46
- /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
47
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_AREG0, mask_off);
48
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP1, TCG_AREG0, table_off);
49
-
50
- /* Extract the TLB index from the address into TMP3. */
51
- tcg_out_opc_sa(s, ALIAS_TSRL, TCG_TMP3, addrl,
52
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
53
- tcg_out_opc_reg(s, OPC_AND, TCG_TMP3, TCG_TMP3, TCG_TMP0);
54
-
55
- /* Add the tlb_table pointer, creating the CPUTLBEntry address in TMP3. */
56
- tcg_out_opc_reg(s, ALIAS_PADD, TCG_TMP3, TCG_TMP3, TCG_TMP1);
57
-
58
- /* Load the (low-half) tlb comparator. */
59
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
60
- tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF);
61
- } else {
62
- tcg_out_ldst(s, (TARGET_LONG_BITS == 64 ? OPC_LD
63
- : TCG_TARGET_REG_BITS == 64 ? OPC_LWU : OPC_LW),
64
- TCG_TMP0, TCG_TMP3, cmp_off);
65
- }
66
-
67
- /* Zero extend a 32-bit guest address for a 64-bit host. */
68
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
69
- tcg_out_ext32u(s, base, addrl);
70
- addrl = base;
71
- }
72
-
73
- /*
74
- * Mask the page bits, keeping the alignment bits to compare against.
75
- * For unaligned accesses, compare against the end of the access to
76
- * verify that it does not cross a page boundary.
77
- */
78
- tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
79
- tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, tlb_mask);
80
- if (a_mask >= s_mask) {
81
- tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl);
82
- } else {
83
- tcg_out_opc_imm(s, ALIAS_PADDI, TCG_TMP2, addrl, s_mask - a_mask);
84
- tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP2);
85
- }
86
-
87
- if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
88
- /* Load the tlb addend for the fast path. */
89
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off);
90
- }
91
-
92
- label_ptr[0] = s->code_ptr;
93
- tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0);
94
-
95
- /* Load and test the high half tlb comparator. */
96
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
97
- /* delay slot */
98
- tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF);
99
-
100
- /* Load the tlb addend for the fast path. */
101
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off);
102
-
103
- label_ptr[1] = s->code_ptr;
104
- tcg_out_opc_br(s, OPC_BNE, addrh, TCG_TMP0);
105
- }
106
-
107
- /* delay slot */
108
- tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP2, addrl);
109
-}
110
-
111
-static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi,
112
- TCGType ext,
113
- TCGReg datalo, TCGReg datahi,
114
- TCGReg addrlo, TCGReg addrhi,
115
- void *raddr, tcg_insn_unit *label_ptr[2])
116
-{
117
- TCGLabelQemuLdst *label = new_ldst_label(s);
118
-
119
- label->is_ld = is_ld;
120
- label->oi = oi;
121
- label->type = ext;
122
- label->datalo_reg = datalo;
123
- label->datahi_reg = datahi;
124
- label->addrlo_reg = addrlo;
125
- label->addrhi_reg = addrhi;
126
- label->raddr = tcg_splitwx_to_rx(raddr);
127
- label->label_ptr[0] = label_ptr[0];
128
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
129
- label->label_ptr[1] = label_ptr[1];
130
- }
131
-}
132
-
133
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
134
{
135
const tcg_insn_unit *tgt_rx = tcg_splitwx_to_rx(s->code_ptr);
136
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
137
}
138
139
#else
140
-
141
-static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo,
142
- TCGReg addrhi, unsigned a_bits)
143
-{
144
- unsigned a_mask = (1 << a_bits) - 1;
145
- TCGLabelQemuLdst *l = new_ldst_label(s);
146
-
147
- l->is_ld = is_ld;
148
- l->addrlo_reg = addrlo;
149
- l->addrhi_reg = addrhi;
150
-
151
- /* We are expecting a_bits to max out at 7, much lower than ANDI. */
152
- tcg_debug_assert(a_bits < 16);
153
- tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, addrlo, a_mask);
154
-
155
- l->label_ptr[0] = s->code_ptr;
156
- if (use_mips32r6_instructions) {
157
- tcg_out_opc_br(s, OPC_BNEZALC_R6, TCG_REG_ZERO, TCG_TMP0);
158
- } else {
159
- tcg_out_opc_br(s, OPC_BNEL, TCG_TMP0, TCG_REG_ZERO);
160
- tcg_out_nop(s);
161
- }
162
-
163
- l->raddr = tcg_splitwx_to_rx(s->code_ptr);
164
-}
165
-
166
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
167
{
168
void *target;
169
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
170
}
171
#endif /* SOFTMMU */
172
173
+typedef struct {
174
+ TCGReg base;
175
+ MemOp align;
176
+} HostAddress;
177
+
178
+/*
179
+ * For softmmu, perform the TLB load and compare.
180
+ * For useronly, perform any required alignment tests.
181
+ * In both cases, return a TCGLabelQemuLdst structure if the slow path
182
+ * is required and fill in @h with the host address for the fast path.
183
+ */
184
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
185
+ TCGReg addrlo, TCGReg addrhi,
186
+ MemOpIdx oi, bool is_ld)
187
+{
188
+ TCGLabelQemuLdst *ldst = NULL;
189
+ MemOp opc = get_memop(oi);
190
+ unsigned a_bits = get_alignment_bits(opc);
191
+ unsigned s_bits = opc & MO_SIZE;
192
+ unsigned a_mask = (1 << a_bits) - 1;
193
+ TCGReg base;
194
+
195
+#ifdef CONFIG_SOFTMMU
196
+ unsigned s_mask = (1 << s_bits) - 1;
197
+ int mem_index = get_mmuidx(oi);
198
+ int fast_off = TLB_MASK_TABLE_OFS(mem_index);
199
+ int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
200
+ int table_off = fast_off + offsetof(CPUTLBDescFast, table);
201
+ int add_off = offsetof(CPUTLBEntry, addend);
202
+ int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
203
+ : offsetof(CPUTLBEntry, addr_write);
204
+ target_ulong tlb_mask;
205
+
206
+ ldst = new_ldst_label(s);
207
+ ldst->is_ld = is_ld;
208
+ ldst->oi = oi;
209
+ ldst->addrlo_reg = addrlo;
210
+ ldst->addrhi_reg = addrhi;
211
+ base = TCG_REG_A0;
212
+
213
+ /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
214
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
215
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
216
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_AREG0, mask_off);
217
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP1, TCG_AREG0, table_off);
218
+
219
+ /* Extract the TLB index from the address into TMP3. */
220
+ tcg_out_opc_sa(s, ALIAS_TSRL, TCG_TMP3, addrlo,
221
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
222
+ tcg_out_opc_reg(s, OPC_AND, TCG_TMP3, TCG_TMP3, TCG_TMP0);
223
+
224
+ /* Add the tlb_table pointer, creating the CPUTLBEntry address in TMP3. */
225
+ tcg_out_opc_reg(s, ALIAS_PADD, TCG_TMP3, TCG_TMP3, TCG_TMP1);
226
+
227
+ /* Load the (low-half) tlb comparator. */
228
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
229
+ tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF);
230
+ } else {
231
+ tcg_out_ldst(s, (TARGET_LONG_BITS == 64 ? OPC_LD
232
+ : TCG_TARGET_REG_BITS == 64 ? OPC_LWU : OPC_LW),
233
+ TCG_TMP0, TCG_TMP3, cmp_off);
234
+ }
235
+
236
+ /* Zero extend a 32-bit guest address for a 64-bit host. */
237
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
238
+ tcg_out_ext32u(s, base, addrlo);
239
+ addrlo = base;
240
+ }
241
+
242
+ /*
243
+ * Mask the page bits, keeping the alignment bits to compare against.
244
+ * For unaligned accesses, compare against the end of the access to
245
+ * verify that it does not cross a page boundary.
246
+ */
247
+ tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
248
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, tlb_mask);
249
+ if (a_mask >= s_mask) {
250
+ tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrlo);
251
+ } else {
252
+ tcg_out_opc_imm(s, ALIAS_PADDI, TCG_TMP2, addrlo, s_mask - a_mask);
253
+ tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP2);
254
+ }
255
+
256
+ if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
257
+ /* Load the tlb addend for the fast path. */
258
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off);
259
+ }
260
+
261
+ ldst->label_ptr[0] = s->code_ptr;
262
+ tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0);
263
+
264
+ /* Load and test the high half tlb comparator. */
265
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
266
+ /* delay slot */
267
+ tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF);
268
+
269
+ /* Load the tlb addend for the fast path. */
270
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off);
271
+
272
+ ldst->label_ptr[1] = s->code_ptr;
273
+ tcg_out_opc_br(s, OPC_BNE, addrhi, TCG_TMP0);
274
+ }
275
+
276
+ /* delay slot */
277
+ tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP2, addrlo);
278
+#else
279
+ if (a_mask && (use_mips32r6_instructions || a_bits != s_bits)) {
280
+ ldst = new_ldst_label(s);
281
+
282
+ ldst->is_ld = is_ld;
283
+ ldst->oi = oi;
284
+ ldst->addrlo_reg = addrlo;
285
+ ldst->addrhi_reg = addrhi;
286
+
287
+ /* We are expecting a_bits to max out at 7, much lower than ANDI. */
288
+ tcg_debug_assert(a_bits < 16);
289
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, addrlo, a_mask);
290
+
291
+ ldst->label_ptr[0] = s->code_ptr;
292
+ if (use_mips32r6_instructions) {
293
+ tcg_out_opc_br(s, OPC_BNEZALC_R6, TCG_REG_ZERO, TCG_TMP0);
294
+ } else {
295
+ tcg_out_opc_br(s, OPC_BNEL, TCG_TMP0, TCG_REG_ZERO);
296
+ tcg_out_nop(s);
297
+ }
298
+ }
299
+
300
+ base = addrlo;
301
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
302
+ tcg_out_ext32u(s, TCG_REG_A0, base);
303
+ base = TCG_REG_A0;
304
+ }
305
+ if (guest_base) {
306
+ if (guest_base == (int16_t)guest_base) {
307
+ tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_A0, base, guest_base);
308
+ } else {
309
+ tcg_out_opc_reg(s, ALIAS_PADD, TCG_REG_A0, base,
310
+ TCG_GUEST_BASE_REG);
311
+ }
312
+ base = TCG_REG_A0;
313
+ }
314
+#endif
315
+
316
+ h->base = base;
317
+ h->align = a_bits;
318
+ return ldst;
319
+}
320
+
321
static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
322
TCGReg base, MemOp opc, TCGType type)
323
{
324
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
325
MemOpIdx oi, TCGType data_type)
326
{
327
MemOp opc = get_memop(oi);
328
- unsigned a_bits = get_alignment_bits(opc);
329
- unsigned s_bits = opc & MO_SIZE;
330
- TCGReg base;
331
+ TCGLabelQemuLdst *ldst;
332
+ HostAddress h;
333
334
- /*
335
- * R6 removes the left/right instructions but requires the
336
- * system to support misaligned memory accesses.
337
- */
338
-#if defined(CONFIG_SOFTMMU)
339
- tcg_insn_unit *label_ptr[2];
340
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
341
342
- base = TCG_REG_A0;
343
- tcg_out_tlb_load(s, base, addrlo, addrhi, oi, label_ptr, 1);
344
- if (use_mips32r6_instructions || a_bits >= s_bits) {
345
- tcg_out_qemu_ld_direct(s, datalo, datahi, base, opc, data_type);
346
+ if (use_mips32r6_instructions || h.align >= (opc & MO_SIZE)) {
347
+ tcg_out_qemu_ld_direct(s, datalo, datahi, h.base, opc, data_type);
348
} else {
349
- tcg_out_qemu_ld_unalign(s, datalo, datahi, base, opc, data_type);
350
+ tcg_out_qemu_ld_unalign(s, datalo, datahi, h.base, opc, data_type);
351
}
352
- add_qemu_ldst_label(s, true, oi, data_type, datalo, datahi,
353
- addrlo, addrhi, s->code_ptr, label_ptr);
354
-#else
355
- base = addrlo;
356
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
357
- tcg_out_ext32u(s, TCG_REG_A0, base);
358
- base = TCG_REG_A0;
359
+
360
+ if (ldst) {
361
+ ldst->type = data_type;
362
+ ldst->datalo_reg = datalo;
363
+ ldst->datahi_reg = datahi;
364
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
365
}
366
- if (guest_base) {
367
- if (guest_base == (int16_t)guest_base) {
368
- tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_A0, base, guest_base);
369
- } else {
370
- tcg_out_opc_reg(s, ALIAS_PADD, TCG_REG_A0, base,
371
- TCG_GUEST_BASE_REG);
372
- }
373
- base = TCG_REG_A0;
374
- }
375
- if (use_mips32r6_instructions) {
376
- if (a_bits) {
377
- tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
378
- }
379
- tcg_out_qemu_ld_direct(s, datalo, datahi, base, opc, data_type);
380
- } else {
381
- if (a_bits && a_bits != s_bits) {
382
- tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
383
- }
384
- if (a_bits >= s_bits) {
385
- tcg_out_qemu_ld_direct(s, datalo, datahi, base, opc, data_type);
386
- } else {
387
- tcg_out_qemu_ld_unalign(s, datalo, datahi, base, opc, data_type);
388
- }
389
- }
390
-#endif
391
}
392
393
static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi,
394
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
395
MemOpIdx oi, TCGType data_type)
396
{
397
MemOp opc = get_memop(oi);
398
- unsigned a_bits = get_alignment_bits(opc);
399
- unsigned s_bits = opc & MO_SIZE;
400
- TCGReg base;
401
+ TCGLabelQemuLdst *ldst;
402
+ HostAddress h;
403
404
- /*
405
- * R6 removes the left/right instructions but requires the
406
- * system to support misaligned memory accesses.
407
- */
408
-#if defined(CONFIG_SOFTMMU)
409
- tcg_insn_unit *label_ptr[2];
410
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
411
412
- base = TCG_REG_A0;
413
- tcg_out_tlb_load(s, base, addrlo, addrhi, oi, label_ptr, 0);
414
- if (use_mips32r6_instructions || a_bits >= s_bits) {
415
- tcg_out_qemu_st_direct(s, datalo, datahi, base, opc);
416
+ if (use_mips32r6_instructions || h.align >= (opc & MO_SIZE)) {
417
+ tcg_out_qemu_st_direct(s, datalo, datahi, h.base, opc);
418
} else {
419
- tcg_out_qemu_st_unalign(s, datalo, datahi, base, opc);
420
+ tcg_out_qemu_st_unalign(s, datalo, datahi, h.base, opc);
421
}
422
- add_qemu_ldst_label(s, false, oi, data_type, datalo, datahi,
423
- addrlo, addrhi, s->code_ptr, label_ptr);
424
-#else
425
- base = addrlo;
426
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
427
- tcg_out_ext32u(s, TCG_REG_A0, base);
428
- base = TCG_REG_A0;
429
+
430
+ if (ldst) {
431
+ ldst->type = data_type;
432
+ ldst->datalo_reg = datalo;
433
+ ldst->datahi_reg = datahi;
434
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
435
}
436
- if (guest_base) {
437
- if (guest_base == (int16_t)guest_base) {
438
- tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_A0, base, guest_base);
439
- } else {
440
- tcg_out_opc_reg(s, ALIAS_PADD, TCG_REG_A0, base,
441
- TCG_GUEST_BASE_REG);
442
- }
443
- base = TCG_REG_A0;
444
- }
445
- if (use_mips32r6_instructions) {
446
- if (a_bits) {
447
- tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
448
- }
449
- tcg_out_qemu_st_direct(s, datalo, datahi, base, opc);
450
- } else {
451
- if (a_bits && a_bits != s_bits) {
452
- tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
453
- }
454
- if (a_bits >= s_bits) {
455
- tcg_out_qemu_st_direct(s, datalo, datahi, base, opc);
456
- } else {
457
- tcg_out_qemu_st_unalign(s, datalo, datahi, base, opc);
458
- }
459
- }
460
-#endif
461
}
462
463
static void tcg_out_mb(TCGContext *s, TCGArg a0)
464
--
465
2.34.1
466
467
diff view generated by jsdifflib
New patch
1
Merge tcg_out_tlb_load, add_qemu_ldst_label, tcg_out_test_alignment,
2
and some code that lived in both tcg_out_qemu_ld and tcg_out_qemu_st
3
into one function that returns HostAddress and TCGLabelQemuLdst structures.
1
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/ppc/tcg-target.c.inc | 381 ++++++++++++++++++---------------------
9
1 file changed, 172 insertions(+), 209 deletions(-)
10
11
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/ppc/tcg-target.c.inc
14
+++ b/tcg/ppc/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
16
[MO_BEUQ] = helper_be_stq_mmu,
17
};
18
19
-/* We expect to use a 16-bit negative offset from ENV. */
20
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
21
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
22
-
23
-/* Perform the TLB load and compare. Places the result of the comparison
24
- in CR7, loads the addend of the TLB into R3, and returns the register
25
- containing the guest address (zero-extended into R4). Clobbers R0 and R2. */
26
-
27
-static TCGReg tcg_out_tlb_read(TCGContext *s, MemOp opc,
28
- TCGReg addrlo, TCGReg addrhi,
29
- int mem_index, bool is_read)
30
-{
31
- int cmp_off
32
- = (is_read
33
- ? offsetof(CPUTLBEntry, addr_read)
34
- : offsetof(CPUTLBEntry, addr_write));
35
- int fast_off = TLB_MASK_TABLE_OFS(mem_index);
36
- int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
37
- int table_off = fast_off + offsetof(CPUTLBDescFast, table);
38
- unsigned s_bits = opc & MO_SIZE;
39
- unsigned a_bits = get_alignment_bits(opc);
40
-
41
- /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
42
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0, mask_off);
43
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R4, TCG_AREG0, table_off);
44
-
45
- /* Extract the page index, shifted into place for tlb index. */
46
- if (TCG_TARGET_REG_BITS == 32) {
47
- tcg_out_shri32(s, TCG_REG_TMP1, addrlo,
48
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
49
- } else {
50
- tcg_out_shri64(s, TCG_REG_TMP1, addrlo,
51
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
52
- }
53
- tcg_out32(s, AND | SAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_TMP1));
54
-
55
- /* Load the TLB comparator. */
56
- if (cmp_off == 0 && TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
57
- uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32
58
- ? LWZUX : LDUX);
59
- tcg_out32(s, lxu | TAB(TCG_REG_TMP1, TCG_REG_R3, TCG_REG_R4));
60
- } else {
61
- tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_R4));
62
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
63
- tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4);
64
- tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off);
65
- } else {
66
- tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off);
67
- }
68
- }
69
-
70
- /* Load the TLB addend for use on the fast path. Do this asap
71
- to minimize any load use delay. */
72
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3,
73
- offsetof(CPUTLBEntry, addend));
74
-
75
- /* Clear the non-page, non-alignment bits from the address */
76
- if (TCG_TARGET_REG_BITS == 32) {
77
- /* We don't support unaligned accesses on 32-bits.
78
- * Preserve the bottom bits and thus trigger a comparison
79
- * failure on unaligned accesses.
80
- */
81
- if (a_bits < s_bits) {
82
- a_bits = s_bits;
83
- }
84
- tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
85
- (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
86
- } else {
87
- TCGReg t = addrlo;
88
-
89
- /* If the access is unaligned, we need to make sure we fail if we
90
- * cross a page boundary. The trick is to add the access size-1
91
- * to the address before masking the low bits. That will make the
92
- * address overflow to the next page if we cross a page boundary,
93
- * which will then force a mismatch of the TLB compare.
94
- */
95
- if (a_bits < s_bits) {
96
- unsigned a_mask = (1 << a_bits) - 1;
97
- unsigned s_mask = (1 << s_bits) - 1;
98
- tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
99
- t = TCG_REG_R0;
100
- }
101
-
102
- /* Mask the address for the requested alignment. */
103
- if (TARGET_LONG_BITS == 32) {
104
- tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
105
- (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
106
- /* Zero-extend the address for use in the final address. */
107
- tcg_out_ext32u(s, TCG_REG_R4, addrlo);
108
- addrlo = TCG_REG_R4;
109
- } else if (a_bits == 0) {
110
- tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
111
- } else {
112
- tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
113
- 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits);
114
- tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
115
- }
116
- }
117
-
118
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
119
- tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
120
- 0, 7, TCG_TYPE_I32);
121
- tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32);
122
- tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
123
- } else {
124
- tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
125
- 0, 7, TCG_TYPE_TL);
126
- }
127
-
128
- return addrlo;
129
-}
130
-
131
-/* Record the context of a call to the out of line helper code for the slow
132
- path for a load or store, so that we can later generate the correct
133
- helper code. */
134
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld,
135
- TCGType type, MemOpIdx oi,
136
- TCGReg datalo_reg, TCGReg datahi_reg,
137
- TCGReg addrlo_reg, TCGReg addrhi_reg,
138
- tcg_insn_unit *raddr, tcg_insn_unit *lptr)
139
-{
140
- TCGLabelQemuLdst *label = new_ldst_label(s);
141
-
142
- label->is_ld = is_ld;
143
- label->type = type;
144
- label->oi = oi;
145
- label->datalo_reg = datalo_reg;
146
- label->datahi_reg = datahi_reg;
147
- label->addrlo_reg = addrlo_reg;
148
- label->addrhi_reg = addrhi_reg;
149
- label->raddr = tcg_splitwx_to_rx(raddr);
150
- label->label_ptr[0] = lptr;
151
-}
152
-
153
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
154
{
155
MemOpIdx oi = lb->oi;
156
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
157
return true;
158
}
159
#else
160
-
161
-static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo,
162
- TCGReg addrhi, unsigned a_bits)
163
-{
164
- unsigned a_mask = (1 << a_bits) - 1;
165
- TCGLabelQemuLdst *label = new_ldst_label(s);
166
-
167
- label->is_ld = is_ld;
168
- label->addrlo_reg = addrlo;
169
- label->addrhi_reg = addrhi;
170
-
171
- /* We are expecting a_bits to max out at 7, much lower than ANDI. */
172
- tcg_debug_assert(a_bits < 16);
173
- tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, a_mask));
174
-
175
- label->label_ptr[0] = s->code_ptr;
176
- tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK);
177
-
178
- label->raddr = tcg_splitwx_to_rx(s->code_ptr);
179
-}
180
-
181
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
182
{
183
if (!reloc_pc14(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
184
@@ -XXX,XX +XXX,XX @@ typedef struct {
185
TCGReg index;
186
} HostAddress;
187
188
+/*
189
+ * For softmmu, perform the TLB load and compare.
190
+ * For useronly, perform any required alignment tests.
191
+ * In both cases, return a TCGLabelQemuLdst structure if the slow path
192
+ * is required and fill in @h with the host address for the fast path.
193
+ */
194
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
195
+ TCGReg addrlo, TCGReg addrhi,
196
+ MemOpIdx oi, bool is_ld)
197
+{
198
+ TCGLabelQemuLdst *ldst = NULL;
199
+ MemOp opc = get_memop(oi);
200
+ unsigned a_bits = get_alignment_bits(opc);
201
+
202
+#ifdef CONFIG_SOFTMMU
203
+ int mem_index = get_mmuidx(oi);
204
+ int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
205
+ : offsetof(CPUTLBEntry, addr_write);
206
+ int fast_off = TLB_MASK_TABLE_OFS(mem_index);
207
+ int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
208
+ int table_off = fast_off + offsetof(CPUTLBDescFast, table);
209
+ unsigned s_bits = opc & MO_SIZE;
210
+
211
+ ldst = new_ldst_label(s);
212
+ ldst->is_ld = is_ld;
213
+ ldst->oi = oi;
214
+ ldst->addrlo_reg = addrlo;
215
+ ldst->addrhi_reg = addrhi;
216
+
217
+ /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
218
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
219
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
220
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0, mask_off);
221
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R4, TCG_AREG0, table_off);
222
+
223
+ /* Extract the page index, shifted into place for tlb index. */
224
+ if (TCG_TARGET_REG_BITS == 32) {
225
+ tcg_out_shri32(s, TCG_REG_TMP1, addrlo,
226
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
227
+ } else {
228
+ tcg_out_shri64(s, TCG_REG_TMP1, addrlo,
229
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
230
+ }
231
+ tcg_out32(s, AND | SAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_TMP1));
232
+
233
+ /* Load the TLB comparator. */
234
+ if (cmp_off == 0 && TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
235
+ uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32
236
+ ? LWZUX : LDUX);
237
+ tcg_out32(s, lxu | TAB(TCG_REG_TMP1, TCG_REG_R3, TCG_REG_R4));
238
+ } else {
239
+ tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_R4));
240
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
241
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4);
242
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off);
243
+ } else {
244
+ tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off);
245
+ }
246
+ }
247
+
248
+ /*
249
+ * Load the TLB addend for use on the fast path.
250
+ * Do this asap to minimize any load use delay.
251
+ */
252
+ h->base = TCG_REG_R3;
253
+ tcg_out_ld(s, TCG_TYPE_PTR, h->base, TCG_REG_R3,
254
+ offsetof(CPUTLBEntry, addend));
255
+
256
+ /* Clear the non-page, non-alignment bits from the address */
257
+ if (TCG_TARGET_REG_BITS == 32) {
258
+ /*
259
+ * We don't support unaligned accesses on 32-bits.
260
+ * Preserve the bottom bits and thus trigger a comparison
261
+ * failure on unaligned accesses.
262
+ */
263
+ if (a_bits < s_bits) {
264
+ a_bits = s_bits;
265
+ }
266
+ tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
267
+ (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
268
+ } else {
269
+ TCGReg t = addrlo;
270
+
271
+ /*
272
+ * If the access is unaligned, we need to make sure we fail if we
273
+ * cross a page boundary. The trick is to add the access size-1
274
+ * to the address before masking the low bits. That will make the
275
+ * address overflow to the next page if we cross a page boundary,
276
+ * which will then force a mismatch of the TLB compare.
277
+ */
278
+ if (a_bits < s_bits) {
279
+ unsigned a_mask = (1 << a_bits) - 1;
280
+ unsigned s_mask = (1 << s_bits) - 1;
281
+ tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
282
+ t = TCG_REG_R0;
283
+ }
284
+
285
+ /* Mask the address for the requested alignment. */
286
+ if (TARGET_LONG_BITS == 32) {
287
+ tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
288
+ (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
289
+ /* Zero-extend the address for use in the final address. */
290
+ tcg_out_ext32u(s, TCG_REG_R4, addrlo);
291
+ addrlo = TCG_REG_R4;
292
+ } else if (a_bits == 0) {
293
+ tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
294
+ } else {
295
+ tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
296
+ 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits);
297
+ tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
298
+ }
299
+ }
300
+ h->index = addrlo;
301
+
302
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
303
+ tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
304
+ 0, 7, TCG_TYPE_I32);
305
+ tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32);
306
+ tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
307
+ } else {
308
+ tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
309
+ 0, 7, TCG_TYPE_TL);
310
+ }
311
+
312
+ /* Load a pointer into the current opcode w/conditional branch-link. */
313
+ ldst->label_ptr[0] = s->code_ptr;
314
+ tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
315
+#else
316
+ if (a_bits) {
317
+ ldst = new_ldst_label(s);
318
+ ldst->is_ld = is_ld;
319
+ ldst->oi = oi;
320
+ ldst->addrlo_reg = addrlo;
321
+ ldst->addrhi_reg = addrhi;
322
+
323
+ /* We are expecting a_bits to max out at 7, much lower than ANDI. */
324
+ tcg_debug_assert(a_bits < 16);
325
+ tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, (1 << a_bits) - 1));
326
+
327
+ ldst->label_ptr[0] = s->code_ptr;
328
+ tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK);
329
+ }
330
+
331
+ h->base = guest_base ? TCG_GUEST_BASE_REG : 0;
332
+ h->index = addrlo;
333
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
334
+ tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
335
+ h->index = TCG_REG_TMP1;
336
+ }
337
+#endif
338
+
339
+ return ldst;
340
+}
341
+
342
static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
343
TCGReg addrlo, TCGReg addrhi,
344
MemOpIdx oi, TCGType data_type)
345
{
346
MemOp opc = get_memop(oi);
347
- MemOp s_bits = opc & MO_SIZE;
348
+ TCGLabelQemuLdst *ldst;
349
HostAddress h;
350
351
-#ifdef CONFIG_SOFTMMU
352
- tcg_insn_unit *label_ptr;
353
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
354
355
- h.index = tcg_out_tlb_read(s, opc, addrlo, addrhi, get_mmuidx(oi), true);
356
- h.base = TCG_REG_R3;
357
-
358
- /* Load a pointer into the current opcode w/conditional branch-link. */
359
- label_ptr = s->code_ptr;
360
- tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
361
-#else /* !CONFIG_SOFTMMU */
362
- unsigned a_bits = get_alignment_bits(opc);
363
- if (a_bits) {
364
- tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
365
- }
366
- h.base = guest_base ? TCG_GUEST_BASE_REG : 0;
367
- h.index = addrlo;
368
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
369
- tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
370
- h.index = TCG_REG_TMP1;
371
- }
372
-#endif
373
-
374
- if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
375
+ if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
376
if (opc & MO_BSWAP) {
377
tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
378
tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index));
379
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
380
}
381
}
382
383
-#ifdef CONFIG_SOFTMMU
384
- add_qemu_ldst_label(s, true, data_type, oi, datalo, datahi,
385
- addrlo, addrhi, s->code_ptr, label_ptr);
386
-#endif
387
+ if (ldst) {
388
+ ldst->type = data_type;
389
+ ldst->datalo_reg = datalo;
390
+ ldst->datahi_reg = datahi;
391
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
392
+ }
393
}
394
395
static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
396
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
397
MemOpIdx oi, TCGType data_type)
398
{
399
MemOp opc = get_memop(oi);
400
- MemOp s_bits = opc & MO_SIZE;
401
+ TCGLabelQemuLdst *ldst;
402
HostAddress h;
403
404
-#ifdef CONFIG_SOFTMMU
405
- tcg_insn_unit *label_ptr;
406
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
407
408
- h.index = tcg_out_tlb_read(s, opc, addrlo, addrhi, get_mmuidx(oi), false);
409
- h.base = TCG_REG_R3;
410
-
411
- /* Load a pointer into the current opcode w/conditional branch-link. */
412
- label_ptr = s->code_ptr;
413
- tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
414
-#else /* !CONFIG_SOFTMMU */
415
- unsigned a_bits = get_alignment_bits(opc);
416
- if (a_bits) {
417
- tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits);
418
- }
419
- h.base = guest_base ? TCG_GUEST_BASE_REG : 0;
420
- h.index = addrlo;
421
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
422
- tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
423
- h.index = TCG_REG_TMP1;
424
- }
425
-#endif
426
-
427
- if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
428
+ if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
429
if (opc & MO_BSWAP) {
430
tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
431
tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index));
432
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
433
}
434
}
435
436
-#ifdef CONFIG_SOFTMMU
437
- add_qemu_ldst_label(s, false, data_type, oi, datalo, datahi,
438
- addrlo, addrhi, s->code_ptr, label_ptr);
439
-#endif
440
+ if (ldst) {
441
+ ldst->type = data_type;
442
+ ldst->datalo_reg = datalo;
443
+ ldst->datahi_reg = datahi;
444
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
445
+ }
446
}
447
448
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
449
--
450
2.34.1
451
452
diff view generated by jsdifflib
New patch
1
Merge tcg_out_tlb_load, add_qemu_ldst_label, tcg_out_test_alignment,
2
and some code that lived in both tcg_out_qemu_ld and tcg_out_qemu_st
3
into one function that returns TCGReg and TCGLabelQemuLdst.
1
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/riscv/tcg-target.c.inc | 253 +++++++++++++++++--------------------
9
1 file changed, 114 insertions(+), 139 deletions(-)
10
11
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/riscv/tcg-target.c.inc
14
+++ b/tcg/riscv/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[MO_SIZE + 1] = {
16
#endif
17
};
18
19
-/* We expect to use a 12-bit negative offset from ENV. */
20
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
21
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11));
22
-
23
static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
24
{
25
tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0);
26
@@ -XXX,XX +XXX,XX @@ static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
27
tcg_debug_assert(ok);
28
}
29
30
-static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, MemOpIdx oi,
31
- tcg_insn_unit **label_ptr, bool is_load)
32
-{
33
- MemOp opc = get_memop(oi);
34
- unsigned s_bits = opc & MO_SIZE;
35
- unsigned a_bits = get_alignment_bits(opc);
36
- tcg_target_long compare_mask;
37
- int mem_index = get_mmuidx(oi);
38
- int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
39
- int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
40
- int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
41
- TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0;
42
-
43
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, mask_base, mask_ofs);
44
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, table_base, table_ofs);
45
-
46
- tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr,
47
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
48
- tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
49
- tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
50
-
51
- /* Load the tlb comparator and the addend. */
52
- tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2,
53
- is_load ? offsetof(CPUTLBEntry, addr_read)
54
- : offsetof(CPUTLBEntry, addr_write));
55
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
56
- offsetof(CPUTLBEntry, addend));
57
-
58
- /* We don't support unaligned accesses. */
59
- if (a_bits < s_bits) {
60
- a_bits = s_bits;
61
- }
62
- /* Clear the non-page, non-alignment bits from the address. */
63
- compare_mask = (tcg_target_long)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
64
- if (compare_mask == sextreg(compare_mask, 0, 12)) {
65
- tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr, compare_mask);
66
- } else {
67
- tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
68
- tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addr);
69
- }
70
-
71
- /* Compare masked address with the TLB entry. */
72
- label_ptr[0] = s->code_ptr;
73
- tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0);
74
-
75
- /* TLB Hit - translate address using addend. */
76
- if (TARGET_LONG_BITS == 32) {
77
- tcg_out_ext32u(s, TCG_REG_TMP0, addr);
78
- addr = TCG_REG_TMP0;
79
- }
80
- tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addr);
81
- return TCG_REG_TMP0;
82
-}
83
-
84
-static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi,
85
- TCGType data_type, TCGReg data_reg,
86
- TCGReg addr_reg, void *raddr,
87
- tcg_insn_unit **label_ptr)
88
-{
89
- TCGLabelQemuLdst *label = new_ldst_label(s);
90
-
91
- label->is_ld = is_ld;
92
- label->oi = oi;
93
- label->type = data_type;
94
- label->datalo_reg = data_reg;
95
- label->addrlo_reg = addr_reg;
96
- label->raddr = tcg_splitwx_to_rx(raddr);
97
- label->label_ptr[0] = label_ptr[0];
98
-}
99
-
100
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
101
{
102
MemOpIdx oi = l->oi;
103
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
104
return true;
105
}
106
#else
107
-
108
-static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg,
109
- unsigned a_bits)
110
-{
111
- unsigned a_mask = (1 << a_bits) - 1;
112
- TCGLabelQemuLdst *l = new_ldst_label(s);
113
-
114
- l->is_ld = is_ld;
115
- l->addrlo_reg = addr_reg;
116
-
117
- /* We are expecting a_bits to max out at 7, so we can always use andi. */
118
- tcg_debug_assert(a_bits < 12);
119
- tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, a_mask);
120
-
121
- l->label_ptr[0] = s->code_ptr;
122
- tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP1, TCG_REG_ZERO, 0);
123
-
124
- l->raddr = tcg_splitwx_to_rx(s->code_ptr);
125
-}
126
-
127
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
128
{
129
/* resolve label address */
130
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
131
{
132
return tcg_out_fail_alignment(s, l);
133
}
134
-
135
#endif /* CONFIG_SOFTMMU */
136
137
+/*
138
+ * For softmmu, perform the TLB load and compare.
139
+ * For useronly, perform any required alignment tests.
140
+ * In both cases, return a TCGLabelQemuLdst structure if the slow path
141
+ * is required and fill in @h with the host address for the fast path.
142
+ */
143
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
144
+ TCGReg addr_reg, MemOpIdx oi,
145
+ bool is_ld)
146
+{
147
+ TCGLabelQemuLdst *ldst = NULL;
148
+ MemOp opc = get_memop(oi);
149
+ unsigned a_bits = get_alignment_bits(opc);
150
+ unsigned a_mask = (1u << a_bits) - 1;
151
+
152
+#ifdef CONFIG_SOFTMMU
153
+ unsigned s_bits = opc & MO_SIZE;
154
+ int mem_index = get_mmuidx(oi);
155
+ int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
156
+ int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
157
+ int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
158
+ TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0;
159
+ tcg_target_long compare_mask;
160
+
161
+ ldst = new_ldst_label(s);
162
+ ldst->is_ld = is_ld;
163
+ ldst->oi = oi;
164
+ ldst->addrlo_reg = addr_reg;
165
+
166
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
167
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11));
168
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, mask_base, mask_ofs);
169
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, table_base, table_ofs);
170
+
171
+ tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr_reg,
172
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
173
+ tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
174
+ tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
175
+
176
+ /* Load the tlb comparator and the addend. */
177
+ tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2,
178
+ is_ld ? offsetof(CPUTLBEntry, addr_read)
179
+ : offsetof(CPUTLBEntry, addr_write));
180
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
181
+ offsetof(CPUTLBEntry, addend));
182
+
183
+ /* We don't support unaligned accesses. */
184
+ if (a_bits < s_bits) {
185
+ a_bits = s_bits;
186
+ }
187
+ /* Clear the non-page, non-alignment bits from the address. */
188
+ compare_mask = (tcg_target_long)TARGET_PAGE_MASK | a_mask;
189
+ if (compare_mask == sextreg(compare_mask, 0, 12)) {
190
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, compare_mask);
191
+ } else {
192
+ tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
193
+ tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addr_reg);
194
+ }
195
+
196
+ /* Compare masked address with the TLB entry. */
197
+ ldst->label_ptr[0] = s->code_ptr;
198
+ tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0);
199
+
200
+ /* TLB Hit - translate address using addend. */
201
+ if (TARGET_LONG_BITS == 32) {
202
+ tcg_out_ext32u(s, TCG_REG_TMP0, addr_reg);
203
+ addr_reg = TCG_REG_TMP0;
204
+ }
205
+ tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addr_reg);
206
+ *pbase = TCG_REG_TMP0;
207
+#else
208
+ if (a_mask) {
209
+ ldst = new_ldst_label(s);
210
+ ldst->is_ld = is_ld;
211
+ ldst->oi = oi;
212
+ ldst->addrlo_reg = addr_reg;
213
+
214
+ /* We are expecting a_bits max 7, so we can always use andi. */
215
+ tcg_debug_assert(a_bits < 12);
216
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, a_mask);
217
+
218
+ ldst->label_ptr[0] = s->code_ptr;
219
+ tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP1, TCG_REG_ZERO, 0);
220
+ }
221
+
222
+ TCGReg base = addr_reg;
223
+ if (TARGET_LONG_BITS == 32) {
224
+ tcg_out_ext32u(s, TCG_REG_TMP0, base);
225
+ base = TCG_REG_TMP0;
226
+ }
227
+ if (guest_base != 0) {
228
+ tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base);
229
+ base = TCG_REG_TMP0;
230
+ }
231
+ *pbase = base;
232
+#endif
233
+
234
+ return ldst;
235
+}
236
+
237
static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg val,
238
TCGReg base, MemOp opc, TCGType type)
239
{
240
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg val,
241
static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
242
MemOpIdx oi, TCGType data_type)
243
{
244
- MemOp opc = get_memop(oi);
245
+ TCGLabelQemuLdst *ldst;
246
TCGReg base;
247
248
-#if defined(CONFIG_SOFTMMU)
249
- tcg_insn_unit *label_ptr[1];
250
+ ldst = prepare_host_addr(s, &base, addr_reg, oi, true);
251
+ tcg_out_qemu_ld_direct(s, data_reg, base, get_memop(oi), data_type);
252
253
- base = tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 1);
254
- tcg_out_qemu_ld_direct(s, data_reg, base, opc, data_type);
255
- add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg,
256
- s->code_ptr, label_ptr);
257
-#else
258
- unsigned a_bits = get_alignment_bits(opc);
259
- if (a_bits) {
260
- tcg_out_test_alignment(s, true, addr_reg, a_bits);
261
+ if (ldst) {
262
+ ldst->type = data_type;
263
+ ldst->datalo_reg = data_reg;
264
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
265
}
266
- base = addr_reg;
267
- if (TARGET_LONG_BITS == 32) {
268
- tcg_out_ext32u(s, TCG_REG_TMP0, base);
269
- base = TCG_REG_TMP0;
270
- }
271
- if (guest_base != 0) {
272
- tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base);
273
- base = TCG_REG_TMP0;
274
- }
275
- tcg_out_qemu_ld_direct(s, data_reg, base, opc, data_type);
276
-#endif
277
}
278
279
static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg val,
280
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg val,
281
static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
282
MemOpIdx oi, TCGType data_type)
283
{
284
- MemOp opc = get_memop(oi);
285
+ TCGLabelQemuLdst *ldst;
286
TCGReg base;
287
288
-#if defined(CONFIG_SOFTMMU)
289
- tcg_insn_unit *label_ptr[1];
290
+ ldst = prepare_host_addr(s, &base, addr_reg, oi, false);
291
+ tcg_out_qemu_st_direct(s, data_reg, base, get_memop(oi));
292
293
- base = tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 0);
294
- tcg_out_qemu_st_direct(s, data_reg, base, opc);
295
- add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg,
296
- s->code_ptr, label_ptr);
297
-#else
298
- unsigned a_bits = get_alignment_bits(opc);
299
- if (a_bits) {
300
- tcg_out_test_alignment(s, false, addr_reg, a_bits);
301
+ if (ldst) {
302
+ ldst->type = data_type;
303
+ ldst->datalo_reg = data_reg;
304
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
305
}
306
- base = addr_reg;
307
- if (TARGET_LONG_BITS == 32) {
308
- tcg_out_ext32u(s, TCG_REG_TMP0, base);
309
- base = TCG_REG_TMP0;
310
- }
311
- if (guest_base != 0) {
312
- tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base);
313
- base = TCG_REG_TMP0;
314
- }
315
- tcg_out_qemu_st_direct(s, data_reg, base, opc);
316
-#endif
317
}
318
319
static const tcg_insn_unit *tb_ret_addr;
320
--
321
2.34.1
322
323
diff view generated by jsdifflib
New patch
1
Merge tcg_out_tlb_load, add_qemu_ldst_label, tcg_out_test_alignment,
2
tcg_prepare_user_ldst, and some code that lived in both tcg_out_qemu_ld
3
and tcg_out_qemu_st into one function that returns HostAddress and
4
TCGLabelQemuLdst structures.
1
5
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tcg/s390x/tcg-target.c.inc | 263 ++++++++++++++++---------------------
10
1 file changed, 113 insertions(+), 150 deletions(-)
11
12
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/s390x/tcg-target.c.inc
15
+++ b/tcg/s390x/tcg-target.c.inc
16
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data,
17
}
18
19
#if defined(CONFIG_SOFTMMU)
20
-/* We're expecting to use a 20-bit negative offset on the tlb memory ops. */
21
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
22
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
23
-
24
-/* Load and compare a TLB entry, leaving the flags set. Loads the TLB
25
- addend into R2. Returns a register with the santitized guest address. */
26
-static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
27
- int mem_index, bool is_ld)
28
-{
29
- unsigned s_bits = opc & MO_SIZE;
30
- unsigned a_bits = get_alignment_bits(opc);
31
- unsigned s_mask = (1 << s_bits) - 1;
32
- unsigned a_mask = (1 << a_bits) - 1;
33
- int fast_off = TLB_MASK_TABLE_OFS(mem_index);
34
- int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
35
- int table_off = fast_off + offsetof(CPUTLBDescFast, table);
36
- int ofs, a_off;
37
- uint64_t tlb_mask;
38
-
39
- tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE,
40
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
41
- tcg_out_insn(s, RXY, NG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, mask_off);
42
- tcg_out_insn(s, RXY, AG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, table_off);
43
-
44
- /* For aligned accesses, we check the first byte and include the alignment
45
- bits within the address. For unaligned access, we check that we don't
46
- cross pages using the address of the last byte of the access. */
47
- a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
48
- tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
49
- if (a_off == 0) {
50
- tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask);
51
- } else {
52
- tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off);
53
- tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask);
54
- }
55
-
56
- if (is_ld) {
57
- ofs = offsetof(CPUTLBEntry, addr_read);
58
- } else {
59
- ofs = offsetof(CPUTLBEntry, addr_write);
60
- }
61
- if (TARGET_LONG_BITS == 32) {
62
- tcg_out_insn(s, RX, C, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
63
- } else {
64
- tcg_out_insn(s, RXY, CG, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
65
- }
66
-
67
- tcg_out_insn(s, RXY, LG, TCG_REG_R2, TCG_REG_R2, TCG_REG_NONE,
68
- offsetof(CPUTLBEntry, addend));
69
-
70
- if (TARGET_LONG_BITS == 32) {
71
- tcg_out_ext32u(s, TCG_REG_R3, addr_reg);
72
- return TCG_REG_R3;
73
- }
74
- return addr_reg;
75
-}
76
-
77
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
78
- TCGType type, TCGReg data, TCGReg addr,
79
- tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
80
-{
81
- TCGLabelQemuLdst *label = new_ldst_label(s);
82
-
83
- label->is_ld = is_ld;
84
- label->oi = oi;
85
- label->type = type;
86
- label->datalo_reg = data;
87
- label->addrlo_reg = addr;
88
- label->raddr = tcg_splitwx_to_rx(raddr);
89
- label->label_ptr[0] = label_ptr;
90
-}
91
-
92
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
93
{
94
TCGReg addr_reg = lb->addrlo_reg;
95
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
96
return true;
97
}
98
#else
99
-static void tcg_out_test_alignment(TCGContext *s, bool is_ld,
100
- TCGReg addrlo, unsigned a_bits)
101
-{
102
- unsigned a_mask = (1 << a_bits) - 1;
103
- TCGLabelQemuLdst *l = new_ldst_label(s);
104
-
105
- l->is_ld = is_ld;
106
- l->addrlo_reg = addrlo;
107
-
108
- /* We are expecting a_bits to max out at 7, much lower than TMLL. */
109
- tcg_debug_assert(a_bits < 16);
110
- tcg_out_insn(s, RI, TMLL, addrlo, a_mask);
111
-
112
- tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */
113
- l->label_ptr[0] = s->code_ptr;
114
- s->code_ptr += 1;
115
-
116
- l->raddr = tcg_splitwx_to_rx(s->code_ptr);
117
-}
118
-
119
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
120
{
121
if (!patch_reloc(l->label_ptr[0], R_390_PC16DBL,
122
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
123
{
124
return tcg_out_fail_alignment(s, l);
125
}
126
+#endif /* CONFIG_SOFTMMU */
127
128
-static HostAddress tcg_prepare_user_ldst(TCGContext *s, TCGReg addr_reg)
129
+/*
130
+ * For softmmu, perform the TLB load and compare.
131
+ * For useronly, perform any required alignment tests.
132
+ * In both cases, return a TCGLabelQemuLdst structure if the slow path
133
+ * is required and fill in @h with the host address for the fast path.
134
+ */
135
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
136
+ TCGReg addr_reg, MemOpIdx oi,
137
+ bool is_ld)
138
{
139
- TCGReg index;
140
- int disp;
141
+ TCGLabelQemuLdst *ldst = NULL;
142
+ MemOp opc = get_memop(oi);
143
+ unsigned a_bits = get_alignment_bits(opc);
144
+ unsigned a_mask = (1u << a_bits) - 1;
145
146
+#ifdef CONFIG_SOFTMMU
147
+ unsigned s_bits = opc & MO_SIZE;
148
+ unsigned s_mask = (1 << s_bits) - 1;
149
+ int mem_index = get_mmuidx(oi);
150
+ int fast_off = TLB_MASK_TABLE_OFS(mem_index);
151
+ int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
152
+ int table_off = fast_off + offsetof(CPUTLBDescFast, table);
153
+ int ofs, a_off;
154
+ uint64_t tlb_mask;
155
+
156
+ ldst = new_ldst_label(s);
157
+ ldst->is_ld = is_ld;
158
+ ldst->oi = oi;
159
+ ldst->addrlo_reg = addr_reg;
160
+
161
+ tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE,
162
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
163
+
164
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
165
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
166
+ tcg_out_insn(s, RXY, NG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, mask_off);
167
+ tcg_out_insn(s, RXY, AG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, table_off);
168
+
169
+ /*
170
+ * For aligned accesses, we check the first byte and include the alignment
171
+ * bits within the address. For unaligned access, we check that we don't
172
+ * cross pages using the address of the last byte of the access.
173
+ */
174
+ a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
175
+ tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
176
+ if (a_off == 0) {
177
+ tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask);
178
+ } else {
179
+ tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off);
180
+ tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask);
181
+ }
182
+
183
+ if (is_ld) {
184
+ ofs = offsetof(CPUTLBEntry, addr_read);
185
+ } else {
186
+ ofs = offsetof(CPUTLBEntry, addr_write);
187
+ }
188
+ if (TARGET_LONG_BITS == 32) {
189
+ tcg_out_insn(s, RX, C, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
190
+ } else {
191
+ tcg_out_insn(s, RXY, CG, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
192
+ }
193
+
194
+ tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
195
+ ldst->label_ptr[0] = s->code_ptr++;
196
+
197
+ h->index = TCG_REG_R2;
198
+ tcg_out_insn(s, RXY, LG, h->index, TCG_REG_R2, TCG_REG_NONE,
199
+ offsetof(CPUTLBEntry, addend));
200
+
201
+ h->base = addr_reg;
202
+ if (TARGET_LONG_BITS == 32) {
203
+ tcg_out_ext32u(s, TCG_REG_R3, addr_reg);
204
+ h->base = TCG_REG_R3;
205
+ }
206
+ h->disp = 0;
207
+#else
208
+ if (a_mask) {
209
+ ldst = new_ldst_label(s);
210
+ ldst->is_ld = is_ld;
211
+ ldst->oi = oi;
212
+ ldst->addrlo_reg = addr_reg;
213
+
214
+ /* We are expecting a_bits to max out at 7, much lower than TMLL. */
215
+ tcg_debug_assert(a_bits < 16);
216
+ tcg_out_insn(s, RI, TMLL, addr_reg, a_mask);
217
+
218
+ tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */
219
+ ldst->label_ptr[0] = s->code_ptr++;
220
+ }
221
+
222
+ h->base = addr_reg;
223
if (TARGET_LONG_BITS == 32) {
224
tcg_out_ext32u(s, TCG_TMP0, addr_reg);
225
- addr_reg = TCG_TMP0;
226
+ h->base = TCG_TMP0;
227
}
228
if (guest_base < 0x80000) {
229
- index = TCG_REG_NONE;
230
- disp = guest_base;
231
+ h->index = TCG_REG_NONE;
232
+ h->disp = guest_base;
233
} else {
234
- index = TCG_GUEST_BASE_REG;
235
- disp = 0;
236
+ h->index = TCG_GUEST_BASE_REG;
237
+ h->disp = 0;
238
}
239
- return (HostAddress){ .base = addr_reg, .index = index, .disp = disp };
240
+#endif
241
+
242
+ return ldst;
243
}
244
-#endif /* CONFIG_SOFTMMU */
245
246
static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
247
MemOpIdx oi, TCGType data_type)
248
{
249
- MemOp opc = get_memop(oi);
250
+ TCGLabelQemuLdst *ldst;
251
HostAddress h;
252
253
-#ifdef CONFIG_SOFTMMU
254
- unsigned mem_index = get_mmuidx(oi);
255
- tcg_insn_unit *label_ptr;
256
+ ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
257
+ tcg_out_qemu_ld_direct(s, get_memop(oi), data_reg, h);
258
259
- h.base = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1);
260
- h.index = TCG_REG_R2;
261
- h.disp = 0;
262
-
263
- tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
264
- label_ptr = s->code_ptr;
265
- s->code_ptr += 1;
266
-
267
- tcg_out_qemu_ld_direct(s, opc, data_reg, h);
268
-
269
- add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg,
270
- s->code_ptr, label_ptr);
271
-#else
272
- unsigned a_bits = get_alignment_bits(opc);
273
-
274
- if (a_bits) {
275
- tcg_out_test_alignment(s, true, addr_reg, a_bits);
276
+ if (ldst) {
277
+ ldst->type = data_type;
278
+ ldst->datalo_reg = data_reg;
279
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
280
}
281
- h = tcg_prepare_user_ldst(s, addr_reg);
282
- tcg_out_qemu_ld_direct(s, opc, data_reg, h);
283
-#endif
284
}
285
286
static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
287
MemOpIdx oi, TCGType data_type)
288
{
289
- MemOp opc = get_memop(oi);
290
+ TCGLabelQemuLdst *ldst;
291
HostAddress h;
292
293
-#ifdef CONFIG_SOFTMMU
294
- unsigned mem_index = get_mmuidx(oi);
295
- tcg_insn_unit *label_ptr;
296
+ ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
297
+ tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h);
298
299
- h.base = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0);
300
- h.index = TCG_REG_R2;
301
- h.disp = 0;
302
-
303
- tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
304
- label_ptr = s->code_ptr;
305
- s->code_ptr += 1;
306
-
307
- tcg_out_qemu_st_direct(s, opc, data_reg, h);
308
-
309
- add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg,
310
- s->code_ptr, label_ptr);
311
-#else
312
- unsigned a_bits = get_alignment_bits(opc);
313
-
314
- if (a_bits) {
315
- tcg_out_test_alignment(s, false, addr_reg, a_bits);
316
+ if (ldst) {
317
+ ldst->type = data_type;
318
+ ldst->datalo_reg = data_reg;
319
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
320
}
321
- h = tcg_prepare_user_ldst(s, addr_reg);
322
- tcg_out_qemu_st_direct(s, opc, data_reg, h);
323
-#endif
324
}
325
326
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
327
--
328
2.34.1
329
330
diff view generated by jsdifflib
New patch
1
Add tcg_out_ld_helper_args, tcg_out_ld_helper_ret,
2
and tcg_out_st_helper_args. These and their subroutines
3
use the existing knowledge of the host function call abi
4
to load the function call arguments and return results.
1
5
6
These will be used to simplify the backends in turn.
7
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
tcg/tcg.c | 475 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
12
1 file changed, 471 insertions(+), 4 deletions(-)
13
14
diff --git a/tcg/tcg.c b/tcg/tcg.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/tcg.c
17
+++ b/tcg/tcg.c
18
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
19
static int tcg_out_ldst_finalize(TCGContext *s);
20
#endif
21
22
+typedef struct TCGLdstHelperParam {
23
+ TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
24
+ unsigned ntmp;
25
+ int tmp[3];
26
+} TCGLdstHelperParam;
27
+
28
+static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
29
+ const TCGLdstHelperParam *p)
30
+ __attribute__((unused));
31
+static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
32
+ bool load_sign, const TCGLdstHelperParam *p)
33
+ __attribute__((unused));
34
+static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
35
+ const TCGLdstHelperParam *p)
36
+ __attribute__((unused));
37
+
38
TCGContext tcg_init_ctx;
39
__thread TCGContext *tcg_ctx;
40
41
@@ -XXX,XX +XXX,XX @@ void tcg_raise_tb_overflow(TCGContext *s)
42
siglongjmp(s->jmp_trans, -2);
43
}
44
45
+/*
46
+ * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
47
+ * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
48
+ *
49
+ * However, tcg_out_helper_load_slots reuses this field to hold an
50
+ * argument slot number (which may designate a argument register or an
51
+ * argument stack slot), converting to TCGReg once all arguments that
52
+ * are destined for the stack are processed.
53
+ */
54
typedef struct TCGMovExtend {
55
- TCGReg dst;
56
+ unsigned dst;
57
TCGReg src;
58
TCGType dst_type;
59
TCGType src_type;
60
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
61
* between the sources and destinations.
62
*/
63
64
-static void __attribute__((unused))
65
-tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
66
- const TCGMovExtend *i2, int scratch)
67
+static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
68
+ const TCGMovExtend *i2, int scratch)
69
{
70
TCGReg src1 = i1->src;
71
TCGReg src2 = i2->src;
72
@@ -XXX,XX +XXX,XX @@ static TCGHelperInfo all_helpers[] = {
73
};
74
static GHashTable *helper_table;
75
76
+/*
77
+ * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
78
+ * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
79
+ * We only use these for layout in tcg_out_ld_helper_ret and
80
+ * tcg_out_st_helper_args, and share them between several of
81
+ * the helpers, with the end result that it's easier to build manually.
82
+ */
83
+
84
+#if TCG_TARGET_REG_BITS == 32
85
+# define dh_typecode_ttl dh_typecode_i32
86
+#else
87
+# define dh_typecode_ttl dh_typecode_i64
88
+#endif
89
+
90
+static TCGHelperInfo info_helper_ld32_mmu = {
91
+ .flags = TCG_CALL_NO_WG,
92
+ .typemask = dh_typemask(ttl, 0) /* return tcg_target_ulong */
93
+ | dh_typemask(env, 1)
94
+ | dh_typemask(tl, 2) /* target_ulong addr */
95
+ | dh_typemask(i32, 3) /* unsigned oi */
96
+ | dh_typemask(ptr, 4) /* uintptr_t ra */
97
+};
98
+
99
+static TCGHelperInfo info_helper_ld64_mmu = {
100
+ .flags = TCG_CALL_NO_WG,
101
+ .typemask = dh_typemask(i64, 0) /* return uint64_t */
102
+ | dh_typemask(env, 1)
103
+ | dh_typemask(tl, 2) /* target_ulong addr */
104
+ | dh_typemask(i32, 3) /* unsigned oi */
105
+ | dh_typemask(ptr, 4) /* uintptr_t ra */
106
+};
107
+
108
+static TCGHelperInfo info_helper_st32_mmu = {
109
+ .flags = TCG_CALL_NO_WG,
110
+ .typemask = dh_typemask(void, 0)
111
+ | dh_typemask(env, 1)
112
+ | dh_typemask(tl, 2) /* target_ulong addr */
113
+ | dh_typemask(i32, 3) /* uint32_t data */
114
+ | dh_typemask(i32, 4) /* unsigned oi */
115
+ | dh_typemask(ptr, 5) /* uintptr_t ra */
116
+};
117
+
118
+static TCGHelperInfo info_helper_st64_mmu = {
119
+ .flags = TCG_CALL_NO_WG,
120
+ .typemask = dh_typemask(void, 0)
121
+ | dh_typemask(env, 1)
122
+ | dh_typemask(tl, 2) /* target_ulong addr */
123
+ | dh_typemask(i64, 3) /* uint64_t data */
124
+ | dh_typemask(i32, 4) /* unsigned oi */
125
+ | dh_typemask(ptr, 5) /* uintptr_t ra */
126
+};
127
+
128
#ifdef CONFIG_TCG_INTERPRETER
129
static ffi_type *typecode_to_ffi(int argmask)
130
{
131
@@ -XXX,XX +XXX,XX @@ static void tcg_context_init(unsigned max_cpus)
132
(gpointer)&all_helpers[i]);
133
}
134
135
+ init_call_layout(&info_helper_ld32_mmu);
136
+ init_call_layout(&info_helper_ld64_mmu);
137
+ init_call_layout(&info_helper_st32_mmu);
138
+ init_call_layout(&info_helper_st64_mmu);
139
+
140
#ifdef CONFIG_TCG_INTERPRETER
141
init_ffi_layouts();
142
#endif
143
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
144
}
145
}
146
147
+/*
148
+ * Similarly for qemu_ld/st slow path helpers.
149
+ * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
150
+ * using only the provided backend tcg_out_* functions.
151
+ */
152
+
153
+static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
154
+{
155
+ int ofs = arg_slot_stk_ofs(slot);
156
+
157
+ /*
158
+ * Each stack slot is TCG_TARGET_LONG_BITS. If the host does not
159
+ * require extension to uint64_t, adjust the address for uint32_t.
160
+ */
161
+ if (HOST_BIG_ENDIAN &&
162
+ TCG_TARGET_REG_BITS == 64 &&
163
+ type == TCG_TYPE_I32) {
164
+ ofs += 4;
165
+ }
166
+ return ofs;
167
+}
168
+
169
+static void tcg_out_helper_load_regs(TCGContext *s,
170
+ unsigned nmov, TCGMovExtend *mov,
171
+ unsigned ntmp, const int *tmp)
172
+{
173
+ switch (nmov) {
174
+ default:
175
+ /* The backend must have provided enough temps for the worst case. */
176
+ tcg_debug_assert(ntmp + 1 >= nmov);
177
+
178
+ for (unsigned i = nmov - 1; i >= 2; --i) {
179
+ TCGReg dst = mov[i].dst;
180
+
181
+ for (unsigned j = 0; j < i; ++j) {
182
+ if (dst == mov[j].src) {
183
+ /*
184
+ * Conflict.
185
+ * Copy the source to a temporary, recurse for the
186
+ * remaining moves, perform the extension from our
187
+ * scratch on the way out.
188
+ */
189
+ TCGReg scratch = tmp[--ntmp];
190
+ tcg_out_mov(s, mov[i].src_type, scratch, mov[i].src);
191
+ mov[i].src = scratch;
192
+
193
+ tcg_out_helper_load_regs(s, i, mov, ntmp, tmp);
194
+ tcg_out_movext1(s, &mov[i]);
195
+ return;
196
+ }
197
+ }
198
+
199
+ /* No conflicts: perform this move and continue. */
200
+ tcg_out_movext1(s, &mov[i]);
201
+ }
202
+ /* fall through for the final two moves */
203
+
204
+ case 2:
205
+ tcg_out_movext2(s, mov, mov + 1, ntmp ? tmp[0] : -1);
206
+ return;
207
+ case 1:
208
+ tcg_out_movext1(s, mov);
209
+ return;
210
+ case 0:
211
+ g_assert_not_reached();
212
+ }
213
+}
214
+
215
+static void tcg_out_helper_load_slots(TCGContext *s,
216
+ unsigned nmov, TCGMovExtend *mov,
217
+ const TCGLdstHelperParam *parm)
218
+{
219
+ unsigned i;
220
+
221
+ /*
222
+ * Start from the end, storing to the stack first.
223
+ * This frees those registers, so we need not consider overlap.
224
+ */
225
+ for (i = nmov; i-- > 0; ) {
226
+ unsigned slot = mov[i].dst;
227
+
228
+ if (arg_slot_reg_p(slot)) {
229
+ goto found_reg;
230
+ }
231
+
232
+ TCGReg src = mov[i].src;
233
+ TCGType dst_type = mov[i].dst_type;
234
+ MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
235
+
236
+ /* The argument is going onto the stack; extend into scratch. */
237
+ if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
238
+ tcg_debug_assert(parm->ntmp != 0);
239
+ mov[i].dst = src = parm->tmp[0];
240
+ tcg_out_movext1(s, &mov[i]);
241
+ }
242
+
243
+ tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
244
+ tcg_out_helper_stk_ofs(dst_type, slot));
245
+ }
246
+ return;
247
+
248
+ found_reg:
249
+ /*
250
+ * The remaining arguments are in registers.
251
+ * Convert slot numbers to argument registers.
252
+ */
253
+ nmov = i + 1;
254
+ for (i = 0; i < nmov; ++i) {
255
+ mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
256
+ }
257
+ tcg_out_helper_load_regs(s, nmov, mov, parm->ntmp, parm->tmp);
258
+}
259
+
260
+static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
261
+ TCGType type, tcg_target_long imm,
262
+ const TCGLdstHelperParam *parm)
263
+{
264
+ if (arg_slot_reg_p(slot)) {
265
+ tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
266
+ } else {
267
+ int ofs = tcg_out_helper_stk_ofs(type, slot);
268
+ if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
269
+ tcg_debug_assert(parm->ntmp != 0);
270
+ tcg_out_movi(s, type, parm->tmp[0], imm);
271
+ tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
272
+ }
273
+ }
274
+}
275
+
276
+static void tcg_out_helper_load_common_args(TCGContext *s,
277
+ const TCGLabelQemuLdst *ldst,
278
+ const TCGLdstHelperParam *parm,
279
+ const TCGHelperInfo *info,
280
+ unsigned next_arg)
281
+{
282
+ TCGMovExtend ptr_mov = {
283
+ .dst_type = TCG_TYPE_PTR,
284
+ .src_type = TCG_TYPE_PTR,
285
+ .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
286
+ };
287
+ const TCGCallArgumentLoc *loc = &info->in[0];
288
+ TCGType type;
289
+ unsigned slot;
290
+ tcg_target_ulong imm;
291
+
292
+ /*
293
+ * Handle env, which is always first.
294
+ */
295
+ ptr_mov.dst = loc->arg_slot;
296
+ ptr_mov.src = TCG_AREG0;
297
+ tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
298
+
299
+ /*
300
+ * Handle oi.
301
+ */
302
+ imm = ldst->oi;
303
+ loc = &info->in[next_arg];
304
+ type = TCG_TYPE_I32;
305
+ switch (loc->kind) {
306
+ case TCG_CALL_ARG_NORMAL:
307
+ break;
308
+ case TCG_CALL_ARG_EXTEND_U:
309
+ case TCG_CALL_ARG_EXTEND_S:
310
+ /* No extension required for MemOpIdx. */
311
+ tcg_debug_assert(imm <= INT32_MAX);
312
+ type = TCG_TYPE_REG;
313
+ break;
314
+ default:
315
+ g_assert_not_reached();
316
+ }
317
+ tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
318
+ next_arg++;
319
+
320
+ /*
321
+ * Handle ra.
322
+ */
323
+ loc = &info->in[next_arg];
324
+ slot = loc->arg_slot;
325
+ if (parm->ra_gen) {
326
+ int arg_reg = -1;
327
+ TCGReg ra_reg;
328
+
329
+ if (arg_slot_reg_p(slot)) {
330
+ arg_reg = tcg_target_call_iarg_regs[slot];
331
+ }
332
+ ra_reg = parm->ra_gen(s, ldst, arg_reg);
333
+
334
+ ptr_mov.dst = slot;
335
+ ptr_mov.src = ra_reg;
336
+ tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
337
+ } else {
338
+ imm = (uintptr_t)ldst->raddr;
339
+ tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
340
+ }
341
+}
342
+
343
+static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
344
+ const TCGCallArgumentLoc *loc,
345
+ TCGType dst_type, TCGType src_type,
346
+ TCGReg lo, TCGReg hi)
347
+{
348
+ if (dst_type <= TCG_TYPE_REG) {
349
+ MemOp src_ext;
350
+
351
+ switch (loc->kind) {
352
+ case TCG_CALL_ARG_NORMAL:
353
+ src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
354
+ break;
355
+ case TCG_CALL_ARG_EXTEND_U:
356
+ dst_type = TCG_TYPE_REG;
357
+ src_ext = MO_UL;
358
+ break;
359
+ case TCG_CALL_ARG_EXTEND_S:
360
+ dst_type = TCG_TYPE_REG;
361
+ src_ext = MO_SL;
362
+ break;
363
+ default:
364
+ g_assert_not_reached();
365
+ }
366
+
367
+ mov[0].dst = loc->arg_slot;
368
+ mov[0].dst_type = dst_type;
369
+ mov[0].src = lo;
370
+ mov[0].src_type = src_type;
371
+ mov[0].src_ext = src_ext;
372
+ return 1;
373
+ }
374
+
375
+ assert(TCG_TARGET_REG_BITS == 32);
376
+
377
+ mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
378
+ mov[0].src = lo;
379
+ mov[0].dst_type = TCG_TYPE_I32;
380
+ mov[0].src_type = TCG_TYPE_I32;
381
+ mov[0].src_ext = MO_32;
382
+
383
+ mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
384
+ mov[1].src = hi;
385
+ mov[1].dst_type = TCG_TYPE_I32;
386
+ mov[1].src_type = TCG_TYPE_I32;
387
+ mov[1].src_ext = MO_32;
388
+
389
+ return 2;
390
+}
391
+
392
+static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
393
+ const TCGLdstHelperParam *parm)
394
+{
395
+ const TCGHelperInfo *info;
396
+ const TCGCallArgumentLoc *loc;
397
+ TCGMovExtend mov[2];
398
+ unsigned next_arg, nmov;
399
+ MemOp mop = get_memop(ldst->oi);
400
+
401
+ switch (mop & MO_SIZE) {
402
+ case MO_8:
403
+ case MO_16:
404
+ case MO_32:
405
+ info = &info_helper_ld32_mmu;
406
+ break;
407
+ case MO_64:
408
+ info = &info_helper_ld64_mmu;
409
+ break;
410
+ default:
411
+ g_assert_not_reached();
412
+ }
413
+
414
+ /* Defer env argument. */
415
+ next_arg = 1;
416
+
417
+ loc = &info->in[next_arg];
418
+ nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL,
419
+ ldst->addrlo_reg, ldst->addrhi_reg);
420
+ next_arg += nmov;
421
+
422
+ tcg_out_helper_load_slots(s, nmov, mov, parm);
423
+
424
+ /* No special attention for 32 and 64-bit return values. */
425
+ tcg_debug_assert(info->out_kind == TCG_CALL_RET_NORMAL);
426
+
427
+ tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
428
+}
429
+
430
+static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
431
+ bool load_sign,
432
+ const TCGLdstHelperParam *parm)
433
+{
434
+ TCGMovExtend mov[2];
435
+
436
+ if (ldst->type <= TCG_TYPE_REG) {
437
+ MemOp mop = get_memop(ldst->oi);
438
+
439
+ mov[0].dst = ldst->datalo_reg;
440
+ mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
441
+ mov[0].dst_type = ldst->type;
442
+ mov[0].src_type = TCG_TYPE_REG;
443
+
444
+ /*
445
+ * If load_sign, then we allowed the helper to perform the
446
+ * appropriate sign extension to tcg_target_ulong, and all
447
+ * we need now is a plain move.
448
+ *
449
+ * If they do not, then we expect the relevant extension
450
+ * instruction to be no more expensive than a move, and
451
+ * we thus save the icache etc by only using one of two
452
+ * helper functions.
453
+ */
454
+ if (load_sign || !(mop & MO_SIGN)) {
455
+ if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
456
+ mov[0].src_ext = MO_32;
457
+ } else {
458
+ mov[0].src_ext = MO_64;
459
+ }
460
+ } else {
461
+ mov[0].src_ext = mop & MO_SSIZE;
462
+ }
463
+ tcg_out_movext1(s, mov);
464
+ } else {
465
+ assert(TCG_TARGET_REG_BITS == 32);
466
+
467
+ mov[0].dst = ldst->datalo_reg;
468
+ mov[0].src =
469
+ tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
470
+ mov[0].dst_type = TCG_TYPE_I32;
471
+ mov[0].src_type = TCG_TYPE_I32;
472
+ mov[0].src_ext = MO_32;
473
+
474
+ mov[1].dst = ldst->datahi_reg;
475
+ mov[1].src =
476
+ tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
477
+ mov[1].dst_type = TCG_TYPE_REG;
478
+ mov[1].src_type = TCG_TYPE_REG;
479
+ mov[1].src_ext = MO_32;
480
+
481
+ tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
482
+ }
483
+}
484
+
485
+static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
486
+ const TCGLdstHelperParam *parm)
487
+{
488
+ const TCGHelperInfo *info;
489
+ const TCGCallArgumentLoc *loc;
490
+ TCGMovExtend mov[4];
491
+ TCGType data_type;
492
+ unsigned next_arg, nmov, n;
493
+ MemOp mop = get_memop(ldst->oi);
494
+
495
+ switch (mop & MO_SIZE) {
496
+ case MO_8:
497
+ case MO_16:
498
+ case MO_32:
499
+ info = &info_helper_st32_mmu;
500
+ data_type = TCG_TYPE_I32;
501
+ break;
502
+ case MO_64:
503
+ info = &info_helper_st64_mmu;
504
+ data_type = TCG_TYPE_I64;
505
+ break;
506
+ default:
507
+ g_assert_not_reached();
508
+ }
509
+
510
+ /* Defer env argument. */
511
+ next_arg = 1;
512
+ nmov = 0;
513
+
514
+ /* Handle addr argument. */
515
+ loc = &info->in[next_arg];
516
+ n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL,
517
+ ldst->addrlo_reg, ldst->addrhi_reg);
518
+ next_arg += n;
519
+ nmov += n;
520
+
521
+ /* Handle data argument. */
522
+ loc = &info->in[next_arg];
523
+ n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
524
+ ldst->datalo_reg, ldst->datahi_reg);
525
+ next_arg += n;
526
+ nmov += n;
527
+ tcg_debug_assert(nmov <= ARRAY_SIZE(mov));
528
+
529
+ tcg_out_helper_load_slots(s, nmov, mov, parm);
530
+ tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
531
+}
532
+
533
#ifdef CONFIG_PROFILER
534
535
/* avoid copy/paste errors */
536
--
537
2.34.1
538
539
diff view generated by jsdifflib
New patch
1
Use tcg_out_ld_helper_args and tcg_out_ld_helper_ret.
1
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/i386/tcg-target.c.inc | 71 +++++++++++++++------------------------
7
1 file changed, 28 insertions(+), 43 deletions(-)
8
9
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/i386/tcg-target.c.inc
12
+++ b/tcg/i386/tcg-target.c.inc
13
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
14
[MO_BEUQ] = helper_be_stq_mmu,
15
};
16
17
+/*
18
+ * Because i686 has no register parameters and because x86_64 has xchg
19
+ * to handle addr/data register overlap, we have placed all input arguments
20
+ * before we need might need a scratch reg.
21
+ *
22
+ * Even then, a scratch is only needed for l->raddr. Rather than expose
23
+ * a general-purpose scratch when we don't actually know it's available,
24
+ * use the ra_gen hook to load into RAX if needed.
25
+ */
26
+#if TCG_TARGET_REG_BITS == 64
27
+static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
28
+{
29
+ if (arg < 0) {
30
+ arg = TCG_REG_RAX;
31
+ }
32
+ tcg_out_movi(s, TCG_TYPE_PTR, arg, (uintptr_t)l->raddr);
33
+ return arg;
34
+}
35
+static const TCGLdstHelperParam ldst_helper_param = {
36
+ .ra_gen = ldst_ra_gen
37
+};
38
+#else
39
+static const TCGLdstHelperParam ldst_helper_param = { };
40
+#endif
41
+
42
/*
43
* Generate code for the slow path for a load at the end of block
44
*/
45
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
46
{
47
- MemOpIdx oi = l->oi;
48
- MemOp opc = get_memop(oi);
49
+ MemOp opc = get_memop(l->oi);
50
tcg_insn_unit **label_ptr = &l->label_ptr[0];
51
52
/* resolve label address */
53
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
54
tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
55
}
56
57
- if (TCG_TARGET_REG_BITS == 32) {
58
- int ofs = 0;
59
-
60
- tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
61
- ofs += 4;
62
-
63
- tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
64
- ofs += 4;
65
-
66
- if (TARGET_LONG_BITS == 64) {
67
- tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
68
- ofs += 4;
69
- }
70
-
71
- tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
72
- ofs += 4;
73
-
74
- tcg_out_sti(s, TCG_TYPE_PTR, (uintptr_t)l->raddr, TCG_REG_ESP, ofs);
75
- } else {
76
- tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
77
- tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
78
- l->addrlo_reg);
79
- tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], oi);
80
- tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
81
- (uintptr_t)l->raddr);
82
- }
83
-
84
+ tcg_out_ld_helper_args(s, l, &ldst_helper_param);
85
tcg_out_branch(s, 1, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
86
+ tcg_out_ld_helper_ret(s, l, false, &ldst_helper_param);
87
88
- if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
89
- TCGMovExtend ext[2] = {
90
- { .dst = l->datalo_reg, .dst_type = TCG_TYPE_I32,
91
- .src = TCG_REG_EAX, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
92
- { .dst = l->datahi_reg, .dst_type = TCG_TYPE_I32,
93
- .src = TCG_REG_EDX, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
94
- };
95
- tcg_out_movext2(s, &ext[0], &ext[1], -1);
96
- } else {
97
- tcg_out_movext(s, l->type, l->datalo_reg,
98
- TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_EAX);
99
- }
100
-
101
- /* Jump to the code corresponding to next IR of qemu_st */
102
tcg_out_jmp(s, l->raddr);
103
return true;
104
}
105
--
106
2.34.1
107
108
diff view generated by jsdifflib
New patch
1
Use tcg_out_st_helper_args. This eliminates the use of a tail call to
2
the store helper. This may or may not be an improvement, depending on
3
the call/return branch prediction of the host microarchitecture.
1
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/i386/tcg-target.c.inc | 57 +++------------------------------------
9
1 file changed, 4 insertions(+), 53 deletions(-)
10
11
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/i386/tcg-target.c.inc
14
+++ b/tcg/i386/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
16
*/
17
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
18
{
19
- MemOpIdx oi = l->oi;
20
- MemOp opc = get_memop(oi);
21
- MemOp s_bits = opc & MO_SIZE;
22
+ MemOp opc = get_memop(l->oi);
23
tcg_insn_unit **label_ptr = &l->label_ptr[0];
24
- TCGReg retaddr;
25
26
/* resolve label address */
27
tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
28
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
29
tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
30
}
31
32
- if (TCG_TARGET_REG_BITS == 32) {
33
- int ofs = 0;
34
+ tcg_out_st_helper_args(s, l, &ldst_helper_param);
35
+ tcg_out_branch(s, 1, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
36
37
- tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
38
- ofs += 4;
39
-
40
- tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
41
- ofs += 4;
42
-
43
- if (TARGET_LONG_BITS == 64) {
44
- tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
45
- ofs += 4;
46
- }
47
-
48
- tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
49
- ofs += 4;
50
-
51
- if (s_bits == MO_64) {
52
- tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
53
- ofs += 4;
54
- }
55
-
56
- tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
57
- ofs += 4;
58
-
59
- retaddr = TCG_REG_EAX;
60
- tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
61
- tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, ofs);
62
- } else {
63
- tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
64
- tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
65
- l->addrlo_reg);
66
- tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
67
- tcg_target_call_iarg_regs[2], l->datalo_reg);
68
- tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], oi);
69
-
70
- if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
71
- retaddr = tcg_target_call_iarg_regs[4];
72
- tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
73
- } else {
74
- retaddr = TCG_REG_RAX;
75
- tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
76
- tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP,
77
- TCG_TARGET_CALL_STACK_OFFSET);
78
- }
79
- }
80
-
81
- /* "Tail call" to the helper, with the return address back inline. */
82
- tcg_out_push(s, retaddr);
83
- tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
84
+ tcg_out_jmp(s, l->raddr);
85
return true;
86
}
87
#else
88
--
89
2.34.1
90
91
diff view generated by jsdifflib
New patch
1
Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret,
2
and tcg_out_st_helper_args.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/aarch64/tcg-target.c.inc | 40 +++++++++++++++---------------------
8
1 file changed, 16 insertions(+), 24 deletions(-)
9
10
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/aarch64/tcg-target.c.inc
13
+++ b/tcg/aarch64/tcg-target.c.inc
14
@@ -XXX,XX +XXX,XX @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
15
}
16
}
17
18
-static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
19
-{
20
- ptrdiff_t offset = tcg_pcrel_diff(s, target);
21
- tcg_debug_assert(offset == sextract64(offset, 0, 21));
22
- tcg_out_insn(s, 3406, ADR, rd, offset);
23
-}
24
-
25
typedef struct {
26
TCGReg base;
27
TCGReg index;
28
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[MO_SIZE + 1] = {
29
#endif
30
};
31
32
+static const TCGLdstHelperParam ldst_helper_param = {
33
+ .ntmp = 1, .tmp = { TCG_REG_TMP }
34
+};
35
+
36
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
37
{
38
- MemOpIdx oi = lb->oi;
39
- MemOp opc = get_memop(oi);
40
+ MemOp opc = get_memop(lb->oi);
41
42
if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
43
return false;
44
}
45
46
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
47
- tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
48
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
49
- tcg_out_adr(s, TCG_REG_X3, lb->raddr);
50
+ tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
51
tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
52
-
53
- tcg_out_movext(s, lb->type, lb->datalo_reg,
54
- TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_X0);
55
+ tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
56
tcg_out_goto(s, lb->raddr);
57
return true;
58
}
59
60
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
61
{
62
- MemOpIdx oi = lb->oi;
63
- MemOp opc = get_memop(oi);
64
- MemOp size = opc & MO_SIZE;
65
+ MemOp opc = get_memop(lb->oi);
66
67
if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
68
return false;
69
}
70
71
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
72
- tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
73
- tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
74
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
75
- tcg_out_adr(s, TCG_REG_X4, lb->raddr);
76
+ tcg_out_st_helper_args(s, lb, &ldst_helper_param);
77
tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
78
tcg_out_goto(s, lb->raddr);
79
return true;
80
}
81
#else
82
+static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
83
+{
84
+ ptrdiff_t offset = tcg_pcrel_diff(s, target);
85
+ tcg_debug_assert(offset == sextract64(offset, 0, 21));
86
+ tcg_out_insn(s, 3406, ADR, rd, offset);
87
+}
88
+
89
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
90
{
91
if (!reloc_pc19(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
92
--
93
2.34.1
94
95
diff view generated by jsdifflib
New patch
1
1
Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret,
2
and tcg_out_st_helper_args. This allows our local
3
tcg_out_arg_* infrastructure to be removed.
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/arm/tcg-target.c.inc | 140 +++++----------------------------------
9
1 file changed, 18 insertions(+), 122 deletions(-)
10
11
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/arm/tcg-target.c.inc
14
+++ b/tcg/arm/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ tcg_out_ldrd_rwb(TCGContext *s, ARMCond cond, TCGReg rt, TCGReg rn, TCGReg rm)
16
tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 1);
17
}
18
19
-static void tcg_out_strd_8(TCGContext *s, ARMCond cond, TCGReg rt,
20
- TCGReg rn, int imm8)
21
+static void __attribute__((unused))
22
+tcg_out_strd_8(TCGContext *s, ARMCond cond, TCGReg rt, TCGReg rn, int imm8)
23
{
24
tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0);
25
}
26
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn)
27
tcg_out_dat_imm(s, COND_AL, ARITH_AND, rd, rn, 0xff);
28
}
29
30
-static void __attribute__((unused))
31
-tcg_out_ext8u_cond(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn)
32
-{
33
- tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff);
34
-}
35
-
36
static void tcg_out_ext16s(TCGContext *s, TCGType t, TCGReg rd, TCGReg rn)
37
{
38
/* sxth */
39
tcg_out32(s, 0x06bf0070 | (COND_AL << 28) | (rd << 12) | rn);
40
}
41
42
-static void tcg_out_ext16u_cond(TCGContext *s, ARMCond cond,
43
- TCGReg rd, TCGReg rn)
44
-{
45
- /* uxth */
46
- tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn);
47
-}
48
-
49
static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn)
50
{
51
- tcg_out_ext16u_cond(s, COND_AL, rd, rn);
52
+ /* uxth */
53
+ tcg_out32(s, 0x06ff0070 | (COND_AL << 28) | (rd << 12) | rn);
54
}
55
56
static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn)
57
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[MO_SIZE + 1] = {
58
#endif
59
};
60
61
-/* Helper routines for marshalling helper function arguments into
62
- * the correct registers and stack.
63
- * argreg is where we want to put this argument, arg is the argument itself.
64
- * Return value is the updated argreg ready for the next call.
65
- * Note that argreg 0..3 is real registers, 4+ on stack.
66
- *
67
- * We provide routines for arguments which are: immediate, 32 bit
68
- * value in register, 16 and 8 bit values in register (which must be zero
69
- * extended before use) and 64 bit value in a lo:hi register pair.
70
- */
71
-#define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG) \
72
-static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg) \
73
-{ \
74
- if (argreg < 4) { \
75
- MOV_ARG(s, COND_AL, argreg, arg); \
76
- } else { \
77
- int ofs = (argreg - 4) * 4; \
78
- EXT_ARG; \
79
- tcg_debug_assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE); \
80
- tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs); \
81
- } \
82
- return argreg + 1; \
83
-}
84
-
85
-DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32,
86
- (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
87
-DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u_cond,
88
- (tcg_out_ext8u_cond(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
89
-DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u_cond,
90
- (tcg_out_ext16u_cond(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
91
-DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, )
92
-
93
-static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
94
- TCGReg arglo, TCGReg arghi)
95
+static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
96
{
97
- /* 64 bit arguments must go in even/odd register pairs
98
- * and in 8-aligned stack slots.
99
- */
100
- if (argreg & 1) {
101
- argreg++;
102
- }
103
- if (argreg >= 4 && (arglo & 1) == 0 && arghi == arglo + 1) {
104
- tcg_out_strd_8(s, COND_AL, arglo,
105
- TCG_REG_CALL_STACK, (argreg - 4) * 4);
106
- return argreg + 2;
107
- } else {
108
- argreg = tcg_out_arg_reg32(s, argreg, arglo);
109
- argreg = tcg_out_arg_reg32(s, argreg, arghi);
110
- return argreg;
111
- }
112
+ /* We arrive at the slow path via "BLNE", so R14 contains l->raddr. */
113
+ return TCG_REG_R14;
114
}
115
116
+static const TCGLdstHelperParam ldst_helper_param = {
117
+ .ra_gen = ldst_ra_gen,
118
+ .ntmp = 1,
119
+ .tmp = { TCG_REG_TMP },
120
+};
121
+
122
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
123
{
124
- TCGReg argreg;
125
- MemOpIdx oi = lb->oi;
126
- MemOp opc = get_memop(oi);
127
+ MemOp opc = get_memop(lb->oi);
128
129
if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
130
return false;
131
}
132
133
- argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0);
134
- if (TARGET_LONG_BITS == 64) {
135
- argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
136
- } else {
137
- argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
138
- }
139
- argreg = tcg_out_arg_imm32(s, argreg, oi);
140
- argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
141
-
142
- /* Use the canonical unsigned helpers and minimize icache usage. */
143
+ tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
144
tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
145
-
146
- if ((opc & MO_SIZE) == MO_64) {
147
- TCGMovExtend ext[2] = {
148
- { .dst = lb->datalo_reg, .dst_type = TCG_TYPE_I32,
149
- .src = TCG_REG_R0, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
150
- { .dst = lb->datahi_reg, .dst_type = TCG_TYPE_I32,
151
- .src = TCG_REG_R1, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
152
- };
153
- tcg_out_movext2(s, &ext[0], &ext[1], TCG_REG_TMP);
154
- } else {
155
- tcg_out_movext(s, TCG_TYPE_I32, lb->datalo_reg,
156
- TCG_TYPE_I32, opc & MO_SSIZE, TCG_REG_R0);
157
- }
158
+ tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
159
160
tcg_out_goto(s, COND_AL, lb->raddr);
161
return true;
162
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
163
164
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
165
{
166
- TCGReg argreg, datalo, datahi;
167
- MemOpIdx oi = lb->oi;
168
- MemOp opc = get_memop(oi);
169
+ MemOp opc = get_memop(lb->oi);
170
171
if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
172
return false;
173
}
174
175
- argreg = TCG_REG_R0;
176
- argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
177
- if (TARGET_LONG_BITS == 64) {
178
- argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
179
- } else {
180
- argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
181
- }
182
-
183
- datalo = lb->datalo_reg;
184
- datahi = lb->datahi_reg;
185
- switch (opc & MO_SIZE) {
186
- case MO_8:
187
- argreg = tcg_out_arg_reg8(s, argreg, datalo);
188
- break;
189
- case MO_16:
190
- argreg = tcg_out_arg_reg16(s, argreg, datalo);
191
- break;
192
- case MO_32:
193
- default:
194
- argreg = tcg_out_arg_reg32(s, argreg, datalo);
195
- break;
196
- case MO_64:
197
- argreg = tcg_out_arg_reg64(s, argreg, datalo, datahi);
198
- break;
199
- }
200
-
201
- argreg = tcg_out_arg_imm32(s, argreg, oi);
202
- argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
203
+ tcg_out_st_helper_args(s, lb, &ldst_helper_param);
204
205
/* Tail-call to the helper, which will return to the fast path. */
206
tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & MO_SIZE]);
207
--
208
2.34.1
209
210
diff view generated by jsdifflib
New patch
1
Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret,
2
and tcg_out_st_helper_args.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/loongarch64/tcg-target.c.inc | 37 ++++++++++----------------------
8
1 file changed, 11 insertions(+), 26 deletions(-)
9
10
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/loongarch64/tcg-target.c.inc
13
+++ b/tcg/loongarch64/tcg-target.c.inc
14
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
15
return reloc_br_sd10k16(s->code_ptr - 1, target);
16
}
17
18
+static const TCGLdstHelperParam ldst_helper_param = {
19
+ .ntmp = 1, .tmp = { TCG_REG_TMP0 }
20
+};
21
+
22
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
23
{
24
- MemOpIdx oi = l->oi;
25
- MemOp opc = get_memop(oi);
26
- MemOp size = opc & MO_SIZE;
27
+ MemOp opc = get_memop(l->oi);
28
29
/* resolve label address */
30
if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
31
return false;
32
}
33
34
- /* call load helper */
35
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0);
36
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A1, l->addrlo_reg);
37
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A2, oi);
38
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, (tcg_target_long)l->raddr);
39
-
40
- tcg_out_call_int(s, qemu_ld_helpers[size], false);
41
-
42
- tcg_out_movext(s, l->type, l->datalo_reg,
43
- TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_A0);
44
+ tcg_out_ld_helper_args(s, l, &ldst_helper_param);
45
+ tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE], false);
46
+ tcg_out_ld_helper_ret(s, l, false, &ldst_helper_param);
47
return tcg_out_goto(s, l->raddr);
48
}
49
50
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
51
{
52
- MemOpIdx oi = l->oi;
53
- MemOp opc = get_memop(oi);
54
- MemOp size = opc & MO_SIZE;
55
+ MemOp opc = get_memop(l->oi);
56
57
/* resolve label address */
58
if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
59
return false;
60
}
61
62
- /* call store helper */
63
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0);
64
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A1, l->addrlo_reg);
65
- tcg_out_movext(s, size == MO_64 ? TCG_TYPE_I32 : TCG_TYPE_I32, TCG_REG_A2,
66
- l->type, size, l->datalo_reg);
67
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, oi);
68
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A4, (tcg_target_long)l->raddr);
69
-
70
- tcg_out_call_int(s, qemu_st_helpers[size], false);
71
-
72
+ tcg_out_st_helper_args(s, l, &ldst_helper_param);
73
+ tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false);
74
return tcg_out_goto(s, l->raddr);
75
}
76
#else
77
--
78
2.34.1
79
80
diff view generated by jsdifflib
New patch
1
1
Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret,
2
and tcg_out_st_helper_args. This allows our local
3
tcg_out_arg_* infrastructure to be removed.
4
5
We are no longer filling the call or return branch
6
delay slots, nor are we tail-calling for the store,
7
but this seems a small price to pay.
8
9
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
12
tcg/mips/tcg-target.c.inc | 154 ++++++--------------------------------
13
1 file changed, 22 insertions(+), 132 deletions(-)
14
15
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
16
index XXXXXXX..XXXXXXX 100644
17
--- a/tcg/mips/tcg-target.c.inc
18
+++ b/tcg/mips/tcg-target.c.inc
19
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
20
[MO_BEUQ] = helper_be_stq_mmu,
21
};
22
23
-/* Helper routines for marshalling helper function arguments into
24
- * the correct registers and stack.
25
- * I is where we want to put this argument, and is updated and returned
26
- * for the next call. ARG is the argument itself.
27
- *
28
- * We provide routines for arguments which are: immediate, 32 bit
29
- * value in register, 16 and 8 bit values in register (which must be zero
30
- * extended before use) and 64 bit value in a lo:hi register pair.
31
- */
32
-
33
-static int tcg_out_call_iarg_reg(TCGContext *s, int i, TCGReg arg)
34
-{
35
- if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
36
- tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[i], arg);
37
- } else {
38
- /* For N32 and N64, the initial offset is different. But there
39
- we also have 8 argument register so we don't run out here. */
40
- tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
41
- tcg_out_st(s, TCG_TYPE_REG, arg, TCG_REG_SP, 4 * i);
42
- }
43
- return i + 1;
44
-}
45
-
46
-static int tcg_out_call_iarg_reg8(TCGContext *s, int i, TCGReg arg)
47
-{
48
- TCGReg tmp = TCG_TMP0;
49
- if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
50
- tmp = tcg_target_call_iarg_regs[i];
51
- }
52
- tcg_out_ext8u(s, tmp, arg);
53
- return tcg_out_call_iarg_reg(s, i, tmp);
54
-}
55
-
56
-static int tcg_out_call_iarg_reg16(TCGContext *s, int i, TCGReg arg)
57
-{
58
- TCGReg tmp = TCG_TMP0;
59
- if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
60
- tmp = tcg_target_call_iarg_regs[i];
61
- }
62
- tcg_out_opc_imm(s, OPC_ANDI, tmp, arg, 0xffff);
63
- return tcg_out_call_iarg_reg(s, i, tmp);
64
-}
65
-
66
-static int tcg_out_call_iarg_imm(TCGContext *s, int i, TCGArg arg)
67
-{
68
- TCGReg tmp = TCG_TMP0;
69
- if (arg == 0) {
70
- tmp = TCG_REG_ZERO;
71
- } else {
72
- if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
73
- tmp = tcg_target_call_iarg_regs[i];
74
- }
75
- tcg_out_movi(s, TCG_TYPE_REG, tmp, arg);
76
- }
77
- return tcg_out_call_iarg_reg(s, i, tmp);
78
-}
79
-
80
-static int tcg_out_call_iarg_reg2(TCGContext *s, int i, TCGReg al, TCGReg ah)
81
-{
82
- tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
83
- i = (i + 1) & ~1;
84
- i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? ah : al));
85
- i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? al : ah));
86
- return i;
87
-}
88
+/* We have four temps, we might as well expose three of them. */
89
+static const TCGLdstHelperParam ldst_helper_param = {
90
+ .ntmp = 3, .tmp = { TCG_TMP0, TCG_TMP1, TCG_TMP2 }
91
+};
92
93
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
94
{
95
const tcg_insn_unit *tgt_rx = tcg_splitwx_to_rx(s->code_ptr);
96
- MemOpIdx oi = l->oi;
97
- MemOp opc = get_memop(oi);
98
- TCGReg v0;
99
- int i;
100
+ MemOp opc = get_memop(l->oi);
101
102
/* resolve label address */
103
if (!reloc_pc16(l->label_ptr[0], tgt_rx)
104
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
105
return false;
106
}
107
108
- i = 1;
109
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
110
- i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg);
111
- } else {
112
- i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg);
113
- }
114
- i = tcg_out_call_iarg_imm(s, i, oi);
115
- i = tcg_out_call_iarg_imm(s, i, (intptr_t)l->raddr);
116
+ tcg_out_ld_helper_args(s, l, &ldst_helper_param);
117
+
118
tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)], false);
119
/* delay slot */
120
- tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
121
+ tcg_out_nop(s);
122
123
- v0 = l->datalo_reg;
124
- if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
125
- /* We eliminated V0 from the possible output registers, so it
126
- cannot be clobbered here. So we must move V1 first. */
127
- if (MIPS_BE) {
128
- tcg_out_mov(s, TCG_TYPE_I32, v0, TCG_REG_V1);
129
- v0 = l->datahi_reg;
130
- } else {
131
- tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_V1);
132
- }
133
- }
134
+ tcg_out_ld_helper_ret(s, l, true, &ldst_helper_param);
135
136
tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO);
137
if (!reloc_pc16(s->code_ptr - 1, l->raddr)) {
138
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
139
}
140
141
/* delay slot */
142
- if (TCG_TARGET_REG_BITS == 64 && l->type == TCG_TYPE_I32) {
143
- /* we always sign-extend 32-bit loads */
144
- tcg_out_ext32s(s, v0, TCG_REG_V0);
145
- } else {
146
- tcg_out_opc_reg(s, OPC_OR, v0, TCG_REG_V0, TCG_REG_ZERO);
147
- }
148
+ tcg_out_nop(s);
149
return true;
150
}
151
152
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
153
{
154
const tcg_insn_unit *tgt_rx = tcg_splitwx_to_rx(s->code_ptr);
155
- MemOpIdx oi = l->oi;
156
- MemOp opc = get_memop(oi);
157
- MemOp s_bits = opc & MO_SIZE;
158
- int i;
159
+ MemOp opc = get_memop(l->oi);
160
161
/* resolve label address */
162
if (!reloc_pc16(l->label_ptr[0], tgt_rx)
163
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
164
return false;
165
}
166
167
- i = 1;
168
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
169
- i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg);
170
- } else {
171
- i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg);
172
- }
173
- switch (s_bits) {
174
- case MO_8:
175
- i = tcg_out_call_iarg_reg8(s, i, l->datalo_reg);
176
- break;
177
- case MO_16:
178
- i = tcg_out_call_iarg_reg16(s, i, l->datalo_reg);
179
- break;
180
- case MO_32:
181
- i = tcg_out_call_iarg_reg(s, i, l->datalo_reg);
182
- break;
183
- case MO_64:
184
- if (TCG_TARGET_REG_BITS == 32) {
185
- i = tcg_out_call_iarg_reg2(s, i, l->datalo_reg, l->datahi_reg);
186
- } else {
187
- i = tcg_out_call_iarg_reg(s, i, l->datalo_reg);
188
- }
189
- break;
190
- default:
191
- g_assert_not_reached();
192
- }
193
- i = tcg_out_call_iarg_imm(s, i, oi);
194
+ tcg_out_st_helper_args(s, l, &ldst_helper_param);
195
196
- /* Tail call to the store helper. Thus force the return address
197
- computation to take place in the return address register. */
198
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)l->raddr);
199
- i = tcg_out_call_iarg_reg(s, i, TCG_REG_RA);
200
- tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)], true);
201
+ tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)], false);
202
/* delay slot */
203
- tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
204
+ tcg_out_nop(s);
205
+
206
+ tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO);
207
+ if (!reloc_pc16(s->code_ptr - 1, l->raddr)) {
208
+ return false;
209
+ }
210
+
211
+ /* delay slot */
212
+ tcg_out_nop(s);
213
return true;
214
}
215
216
--
217
2.34.1
218
219
diff view generated by jsdifflib
New patch
1
Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret,
2
and tcg_out_st_helper_args.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/ppc/tcg-target.c.inc | 88 ++++++++++++----------------------------
9
1 file changed, 26 insertions(+), 62 deletions(-)
10
11
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/ppc/tcg-target.c.inc
14
+++ b/tcg/ppc/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
16
[MO_BEUQ] = helper_be_stq_mmu,
17
};
18
19
+static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
20
+{
21
+ if (arg < 0) {
22
+ arg = TCG_REG_TMP1;
23
+ }
24
+ tcg_out32(s, MFSPR | RT(arg) | LR);
25
+ return arg;
26
+}
27
+
28
+/*
29
+ * For the purposes of ppc32 sorting 4 input registers into 4 argument
30
+ * registers, there is an outside chance we would require 3 temps.
31
+ * Because of constraints, no inputs are in r3, and env will not be
32
+ * placed into r3 until after the sorting is done, and is thus free.
33
+ */
34
+static const TCGLdstHelperParam ldst_helper_param = {
35
+ .ra_gen = ldst_ra_gen,
36
+ .ntmp = 3,
37
+ .tmp = { TCG_REG_TMP1, TCG_REG_R0, TCG_REG_R3 }
38
+};
39
+
40
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
41
{
42
- MemOpIdx oi = lb->oi;
43
- MemOp opc = get_memop(oi);
44
- TCGReg hi, lo, arg = TCG_REG_R3;
45
+ MemOp opc = get_memop(lb->oi);
46
47
if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
48
return false;
49
}
50
51
- tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
52
-
53
- lo = lb->addrlo_reg;
54
- hi = lb->addrhi_reg;
55
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
56
- arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
57
- tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
58
- tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
59
- } else {
60
- /* If the address needed to be zero-extended, we'll have already
61
- placed it in R4. The only remaining case is 64-bit guest. */
62
- tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
63
- }
64
-
65
- tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
66
- tcg_out32(s, MFSPR | RT(arg) | LR);
67
-
68
+ tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
69
tcg_out_call_int(s, LK, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
70
-
71
- lo = lb->datalo_reg;
72
- hi = lb->datahi_reg;
73
- if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
74
- tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_REG_R4);
75
- tcg_out_mov(s, TCG_TYPE_I32, hi, TCG_REG_R3);
76
- } else {
77
- tcg_out_movext(s, lb->type, lo,
78
- TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_R3);
79
- }
80
+ tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
81
82
tcg_out_b(s, 0, lb->raddr);
83
return true;
84
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
85
86
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
87
{
88
- MemOpIdx oi = lb->oi;
89
- MemOp opc = get_memop(oi);
90
- MemOp s_bits = opc & MO_SIZE;
91
- TCGReg hi, lo, arg = TCG_REG_R3;
92
+ MemOp opc = get_memop(lb->oi);
93
94
if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
95
return false;
96
}
97
98
- tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
99
-
100
- lo = lb->addrlo_reg;
101
- hi = lb->addrhi_reg;
102
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
103
- arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
104
- tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
105
- tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
106
- } else {
107
- /* If the address needed to be zero-extended, we'll have already
108
- placed it in R4. The only remaining case is 64-bit guest. */
109
- tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
110
- }
111
-
112
- lo = lb->datalo_reg;
113
- hi = lb->datahi_reg;
114
- if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
115
- arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
116
- tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
117
- tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
118
- } else {
119
- tcg_out_movext(s, s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32,
120
- arg++, lb->type, s_bits, lo);
121
- }
122
-
123
- tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
124
- tcg_out32(s, MFSPR | RT(arg) | LR);
125
-
126
+ tcg_out_st_helper_args(s, lb, &ldst_helper_param);
127
tcg_out_call_int(s, LK, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
128
129
tcg_out_b(s, 0, lb->raddr);
130
--
131
2.34.1
132
133
diff view generated by jsdifflib
New patch
1
Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret,
2
and tcg_out_st_helper_args.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/riscv/tcg-target.c.inc | 37 ++++++++++---------------------------
9
1 file changed, 10 insertions(+), 27 deletions(-)
10
11
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/riscv/tcg-target.c.inc
14
+++ b/tcg/riscv/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
16
tcg_debug_assert(ok);
17
}
18
19
+/* We have three temps, we might as well expose them. */
20
+static const TCGLdstHelperParam ldst_helper_param = {
21
+ .ntmp = 3, .tmp = { TCG_REG_TMP0, TCG_REG_TMP1, TCG_REG_TMP2 }
22
+};
23
+
24
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
25
{
26
- MemOpIdx oi = l->oi;
27
- MemOp opc = get_memop(oi);
28
- TCGReg a0 = tcg_target_call_iarg_regs[0];
29
- TCGReg a1 = tcg_target_call_iarg_regs[1];
30
- TCGReg a2 = tcg_target_call_iarg_regs[2];
31
- TCGReg a3 = tcg_target_call_iarg_regs[3];
32
+ MemOp opc = get_memop(l->oi);
33
34
/* resolve label address */
35
if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
36
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
37
}
38
39
/* call load helper */
40
- tcg_out_mov(s, TCG_TYPE_PTR, a0, TCG_AREG0);
41
- tcg_out_mov(s, TCG_TYPE_PTR, a1, l->addrlo_reg);
42
- tcg_out_movi(s, TCG_TYPE_PTR, a2, oi);
43
- tcg_out_movi(s, TCG_TYPE_PTR, a3, (tcg_target_long)l->raddr);
44
-
45
+ tcg_out_ld_helper_args(s, l, &ldst_helper_param);
46
tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SSIZE], false);
47
- tcg_out_mov(s, (opc & MO_SIZE) == MO_64, l->datalo_reg, a0);
48
+ tcg_out_ld_helper_ret(s, l, true, &ldst_helper_param);
49
50
tcg_out_goto(s, l->raddr);
51
return true;
52
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
53
54
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
55
{
56
- MemOpIdx oi = l->oi;
57
- MemOp opc = get_memop(oi);
58
- MemOp s_bits = opc & MO_SIZE;
59
- TCGReg a0 = tcg_target_call_iarg_regs[0];
60
- TCGReg a1 = tcg_target_call_iarg_regs[1];
61
- TCGReg a2 = tcg_target_call_iarg_regs[2];
62
- TCGReg a3 = tcg_target_call_iarg_regs[3];
63
- TCGReg a4 = tcg_target_call_iarg_regs[4];
64
+ MemOp opc = get_memop(l->oi);
65
66
/* resolve label address */
67
if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
68
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
69
}
70
71
/* call store helper */
72
- tcg_out_mov(s, TCG_TYPE_PTR, a0, TCG_AREG0);
73
- tcg_out_mov(s, TCG_TYPE_PTR, a1, l->addrlo_reg);
74
- tcg_out_movext(s, s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32, a2,
75
- l->type, s_bits, l->datalo_reg);
76
- tcg_out_movi(s, TCG_TYPE_PTR, a3, oi);
77
- tcg_out_movi(s, TCG_TYPE_PTR, a4, (tcg_target_long)l->raddr);
78
-
79
+ tcg_out_st_helper_args(s, l, &ldst_helper_param);
80
tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false);
81
82
tcg_out_goto(s, l->raddr);
83
--
84
2.34.1
85
86
diff view generated by jsdifflib
New patch
1
Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret,
2
and tcg_out_st_helper_args.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/s390x/tcg-target.c.inc | 35 ++++++++++-------------------------
8
1 file changed, 10 insertions(+), 25 deletions(-)
9
10
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/s390x/tcg-target.c.inc
13
+++ b/tcg/s390x/tcg-target.c.inc
14
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data,
15
}
16
17
#if defined(CONFIG_SOFTMMU)
18
+static const TCGLdstHelperParam ldst_helper_param = {
19
+ .ntmp = 1, .tmp = { TCG_TMP0 }
20
+};
21
+
22
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
23
{
24
- TCGReg addr_reg = lb->addrlo_reg;
25
- TCGReg data_reg = lb->datalo_reg;
26
- MemOpIdx oi = lb->oi;
27
- MemOp opc = get_memop(oi);
28
+ MemOp opc = get_memop(lb->oi);
29
30
if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
31
(intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
32
return false;
33
}
34
35
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
36
- if (TARGET_LONG_BITS == 64) {
37
- tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
38
- }
39
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi);
40
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr);
41
- tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]);
42
- tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
43
+ tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
44
+ tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
45
+ tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
46
47
tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
48
return true;
49
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
50
51
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
52
{
53
- TCGReg addr_reg = lb->addrlo_reg;
54
- TCGReg data_reg = lb->datalo_reg;
55
- MemOpIdx oi = lb->oi;
56
- MemOp opc = get_memop(oi);
57
- MemOp size = opc & MO_SIZE;
58
+ MemOp opc = get_memop(lb->oi);
59
60
if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
61
(intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
62
return false;
63
}
64
65
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
66
- if (TARGET_LONG_BITS == 64) {
67
- tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
68
- }
69
- tcg_out_movext(s, size == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32,
70
- TCG_REG_R4, lb->type, size, data_reg);
71
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi);
72
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr);
73
+ tcg_out_st_helper_args(s, lb, &ldst_helper_param);
74
tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
75
76
tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
77
--
78
2.34.1
79
80
diff view generated by jsdifflib
New patch
1
The softmmu tlb uses TCG_REG_TMP[0-2], not any of the normally available
2
registers. Now that we handle overlap betwen inputs and helper arguments,
3
we can allow any allocatable reg.
1
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/loongarch64/tcg-target-con-set.h | 2 --
9
tcg/loongarch64/tcg-target-con-str.h | 1 -
10
tcg/loongarch64/tcg-target.c.inc | 23 ++++-------------------
11
3 files changed, 4 insertions(+), 22 deletions(-)
12
13
diff --git a/tcg/loongarch64/tcg-target-con-set.h b/tcg/loongarch64/tcg-target-con-set.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/loongarch64/tcg-target-con-set.h
16
+++ b/tcg/loongarch64/tcg-target-con-set.h
17
@@ -XXX,XX +XXX,XX @@
18
C_O0_I1(r)
19
C_O0_I2(rZ, r)
20
C_O0_I2(rZ, rZ)
21
-C_O0_I2(LZ, L)
22
C_O1_I1(r, r)
23
-C_O1_I1(r, L)
24
C_O1_I2(r, r, rC)
25
C_O1_I2(r, r, ri)
26
C_O1_I2(r, r, rI)
27
diff --git a/tcg/loongarch64/tcg-target-con-str.h b/tcg/loongarch64/tcg-target-con-str.h
28
index XXXXXXX..XXXXXXX 100644
29
--- a/tcg/loongarch64/tcg-target-con-str.h
30
+++ b/tcg/loongarch64/tcg-target-con-str.h
31
@@ -XXX,XX +XXX,XX @@
32
* REGS(letter, register_mask)
33
*/
34
REGS('r', ALL_GENERAL_REGS)
35
-REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
36
37
/*
38
* Define constraint letters for constants:
39
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
40
index XXXXXXX..XXXXXXX 100644
41
--- a/tcg/loongarch64/tcg-target.c.inc
42
+++ b/tcg/loongarch64/tcg-target.c.inc
43
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
44
#define TCG_CT_CONST_C12 0x1000
45
#define TCG_CT_CONST_WSZ 0x2000
46
47
-#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
48
-/*
49
- * For softmmu, we need to avoid conflicts with the first 5
50
- * argument registers to call the helper. Some of these are
51
- * also used for the tlb lookup.
52
- */
53
-#ifdef CONFIG_SOFTMMU
54
-#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_A0, 5)
55
-#else
56
-#define SOFTMMU_RESERVE_REGS 0
57
-#endif
58
-
59
+#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
60
61
static inline tcg_target_long sextreg(tcg_target_long val, int pos, int len)
62
{
63
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
64
case INDEX_op_st32_i64:
65
case INDEX_op_st_i32:
66
case INDEX_op_st_i64:
67
+ case INDEX_op_qemu_st_i32:
68
+ case INDEX_op_qemu_st_i64:
69
return C_O0_I2(rZ, r);
70
71
case INDEX_op_brcond_i32:
72
case INDEX_op_brcond_i64:
73
return C_O0_I2(rZ, rZ);
74
75
- case INDEX_op_qemu_st_i32:
76
- case INDEX_op_qemu_st_i64:
77
- return C_O0_I2(LZ, L);
78
-
79
case INDEX_op_ext8s_i32:
80
case INDEX_op_ext8s_i64:
81
case INDEX_op_ext8u_i32:
82
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
83
case INDEX_op_ld32u_i64:
84
case INDEX_op_ld_i32:
85
case INDEX_op_ld_i64:
86
- return C_O1_I1(r, r);
87
-
88
case INDEX_op_qemu_ld_i32:
89
case INDEX_op_qemu_ld_i64:
90
- return C_O1_I1(r, L);
91
+ return C_O1_I1(r, r);
92
93
case INDEX_op_andc_i32:
94
case INDEX_op_andc_i64:
95
--
96
2.34.1
97
98
diff view generated by jsdifflib
New patch
1
While performing the load in the delay slot of the call to the common
2
bswap helper function is cute, it is not worth the added complexity.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/mips/tcg-target.h | 4 +-
8
tcg/mips/tcg-target.c.inc | 284 ++++++--------------------------------
9
2 files changed, 48 insertions(+), 240 deletions(-)
10
11
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/mips/tcg-target.h
14
+++ b/tcg/mips/tcg-target.h
15
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
16
#define TCG_TARGET_HAS_ext16u_i64 0 /* andi rt, rs, 0xffff */
17
#endif
18
19
-#define TCG_TARGET_DEFAULT_MO (0)
20
-#define TCG_TARGET_HAS_MEMORY_BSWAP 1
21
+#define TCG_TARGET_DEFAULT_MO 0
22
+#define TCG_TARGET_HAS_MEMORY_BSWAP 0
23
24
#define TCG_TARGET_NEED_LDST_LABELS
25
26
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
27
index XXXXXXX..XXXXXXX 100644
28
--- a/tcg/mips/tcg-target.c.inc
29
+++ b/tcg/mips/tcg-target.c.inc
30
@@ -XXX,XX +XXX,XX @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg,
31
}
32
33
#if defined(CONFIG_SOFTMMU)
34
-static void * const qemu_ld_helpers[(MO_SSIZE | MO_BSWAP) + 1] = {
35
+static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
36
[MO_UB] = helper_ret_ldub_mmu,
37
[MO_SB] = helper_ret_ldsb_mmu,
38
- [MO_LEUW] = helper_le_lduw_mmu,
39
- [MO_LESW] = helper_le_ldsw_mmu,
40
- [MO_LEUL] = helper_le_ldul_mmu,
41
- [MO_LEUQ] = helper_le_ldq_mmu,
42
- [MO_BEUW] = helper_be_lduw_mmu,
43
- [MO_BESW] = helper_be_ldsw_mmu,
44
- [MO_BEUL] = helper_be_ldul_mmu,
45
- [MO_BEUQ] = helper_be_ldq_mmu,
46
-#if TCG_TARGET_REG_BITS == 64
47
- [MO_LESL] = helper_le_ldsl_mmu,
48
- [MO_BESL] = helper_be_ldsl_mmu,
49
+#if HOST_BIG_ENDIAN
50
+ [MO_UW] = helper_be_lduw_mmu,
51
+ [MO_SW] = helper_be_ldsw_mmu,
52
+ [MO_UL] = helper_be_ldul_mmu,
53
+ [MO_SL] = helper_be_ldsl_mmu,
54
+ [MO_UQ] = helper_be_ldq_mmu,
55
+#else
56
+ [MO_UW] = helper_le_lduw_mmu,
57
+ [MO_SW] = helper_le_ldsw_mmu,
58
+ [MO_UL] = helper_le_ldul_mmu,
59
+ [MO_UQ] = helper_le_ldq_mmu,
60
+ [MO_SL] = helper_le_ldsl_mmu,
61
#endif
62
};
63
64
-static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
65
+static void * const qemu_st_helpers[MO_SIZE + 1] = {
66
[MO_UB] = helper_ret_stb_mmu,
67
- [MO_LEUW] = helper_le_stw_mmu,
68
- [MO_LEUL] = helper_le_stl_mmu,
69
- [MO_LEUQ] = helper_le_stq_mmu,
70
- [MO_BEUW] = helper_be_stw_mmu,
71
- [MO_BEUL] = helper_be_stl_mmu,
72
- [MO_BEUQ] = helper_be_stq_mmu,
73
+#if HOST_BIG_ENDIAN
74
+ [MO_UW] = helper_be_stw_mmu,
75
+ [MO_UL] = helper_be_stl_mmu,
76
+ [MO_UQ] = helper_be_stq_mmu,
77
+#else
78
+ [MO_UW] = helper_le_stw_mmu,
79
+ [MO_UL] = helper_le_stl_mmu,
80
+ [MO_UQ] = helper_le_stq_mmu,
81
+#endif
82
};
83
84
/* We have four temps, we might as well expose three of them. */
85
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
86
87
tcg_out_ld_helper_args(s, l, &ldst_helper_param);
88
89
- tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)], false);
90
+ tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SSIZE], false);
91
/* delay slot */
92
tcg_out_nop(s);
93
94
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
95
96
tcg_out_st_helper_args(s, l, &ldst_helper_param);
97
98
- tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)], false);
99
+ tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false);
100
/* delay slot */
101
tcg_out_nop(s);
102
103
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
104
static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
105
TCGReg base, MemOp opc, TCGType type)
106
{
107
- switch (opc & (MO_SSIZE | MO_BSWAP)) {
108
+ switch (opc & MO_SSIZE) {
109
case MO_UB:
110
tcg_out_opc_imm(s, OPC_LBU, lo, base, 0);
111
break;
112
case MO_SB:
113
tcg_out_opc_imm(s, OPC_LB, lo, base, 0);
114
break;
115
- case MO_UW | MO_BSWAP:
116
- tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0);
117
- tcg_out_bswap16(s, lo, TCG_TMP1, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
118
- break;
119
case MO_UW:
120
tcg_out_opc_imm(s, OPC_LHU, lo, base, 0);
121
break;
122
- case MO_SW | MO_BSWAP:
123
- tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0);
124
- tcg_out_bswap16(s, lo, TCG_TMP1, TCG_BSWAP_IZ | TCG_BSWAP_OS);
125
- break;
126
case MO_SW:
127
tcg_out_opc_imm(s, OPC_LH, lo, base, 0);
128
break;
129
- case MO_UL | MO_BSWAP:
130
- if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64) {
131
- if (use_mips32r2_instructions) {
132
- tcg_out_opc_imm(s, OPC_LWU, lo, base, 0);
133
- tcg_out_bswap32(s, lo, lo, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
134
- } else {
135
- tcg_out_bswap_subr(s, bswap32u_addr);
136
- /* delay slot */
137
- tcg_out_opc_imm(s, OPC_LWU, TCG_TMP0, base, 0);
138
- tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_TMP3);
139
- }
140
- break;
141
- }
142
- /* FALLTHRU */
143
- case MO_SL | MO_BSWAP:
144
- if (use_mips32r2_instructions) {
145
- tcg_out_opc_imm(s, OPC_LW, lo, base, 0);
146
- tcg_out_bswap32(s, lo, lo, 0);
147
- } else {
148
- tcg_out_bswap_subr(s, bswap32_addr);
149
- /* delay slot */
150
- tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 0);
151
- tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_TMP3);
152
- }
153
- break;
154
case MO_UL:
155
if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64) {
156
tcg_out_opc_imm(s, OPC_LWU, lo, base, 0);
157
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
158
case MO_SL:
159
tcg_out_opc_imm(s, OPC_LW, lo, base, 0);
160
break;
161
- case MO_UQ | MO_BSWAP:
162
- if (TCG_TARGET_REG_BITS == 64) {
163
- if (use_mips32r2_instructions) {
164
- tcg_out_opc_imm(s, OPC_LD, lo, base, 0);
165
- tcg_out_bswap64(s, lo, lo);
166
- } else {
167
- tcg_out_bswap_subr(s, bswap64_addr);
168
- /* delay slot */
169
- tcg_out_opc_imm(s, OPC_LD, TCG_TMP0, base, 0);
170
- tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_TMP3);
171
- }
172
- } else if (use_mips32r2_instructions) {
173
- tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 0);
174
- tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, 4);
175
- tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, TCG_TMP0);
176
- tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, TCG_TMP1);
177
- tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? lo : hi, TCG_TMP0, 16);
178
- tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? hi : lo, TCG_TMP1, 16);
179
- } else {
180
- tcg_out_bswap_subr(s, bswap32_addr);
181
- /* delay slot */
182
- tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 0);
183
- tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 4);
184
- tcg_out_bswap_subr(s, bswap32_addr);
185
- /* delay slot */
186
- tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? lo : hi, TCG_TMP3);
187
- tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? hi : lo, TCG_TMP3);
188
- }
189
- break;
190
case MO_UQ:
191
/* Prefer to load from offset 0 first, but allow for overlap. */
192
if (TCG_TARGET_REG_BITS == 64) {
193
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi,
194
const MIPSInsn lw2 = MIPS_BE ? OPC_LWR : OPC_LWL;
195
const MIPSInsn ld1 = MIPS_BE ? OPC_LDL : OPC_LDR;
196
const MIPSInsn ld2 = MIPS_BE ? OPC_LDR : OPC_LDL;
197
+ bool sgn = opc & MO_SIGN;
198
199
- bool sgn = (opc & MO_SIGN);
200
-
201
- switch (opc & (MO_SSIZE | MO_BSWAP)) {
202
- case MO_SW | MO_BE:
203
- case MO_UW | MO_BE:
204
- tcg_out_opc_imm(s, sgn ? OPC_LB : OPC_LBU, TCG_TMP0, base, 0);
205
- tcg_out_opc_imm(s, OPC_LBU, lo, base, 1);
206
- if (use_mips32r2_instructions) {
207
- tcg_out_opc_bf(s, OPC_INS, lo, TCG_TMP0, 31, 8);
208
- } else {
209
- tcg_out_opc_sa(s, OPC_SLL, TCG_TMP0, TCG_TMP0, 8);
210
- tcg_out_opc_reg(s, OPC_OR, lo, TCG_TMP0, TCG_TMP1);
211
- }
212
- break;
213
-
214
- case MO_SW | MO_LE:
215
- case MO_UW | MO_LE:
216
- if (use_mips32r2_instructions && lo != base) {
217
+ switch (opc & MO_SIZE) {
218
+ case MO_16:
219
+ if (HOST_BIG_ENDIAN) {
220
+ tcg_out_opc_imm(s, sgn ? OPC_LB : OPC_LBU, TCG_TMP0, base, 0);
221
+ tcg_out_opc_imm(s, OPC_LBU, lo, base, 1);
222
+ if (use_mips32r2_instructions) {
223
+ tcg_out_opc_bf(s, OPC_INS, lo, TCG_TMP0, 31, 8);
224
+ } else {
225
+ tcg_out_opc_sa(s, OPC_SLL, TCG_TMP0, TCG_TMP0, 8);
226
+ tcg_out_opc_reg(s, OPC_OR, lo, lo, TCG_TMP0);
227
+ }
228
+ } else if (use_mips32r2_instructions && lo != base) {
229
tcg_out_opc_imm(s, OPC_LBU, lo, base, 0);
230
tcg_out_opc_imm(s, sgn ? OPC_LB : OPC_LBU, TCG_TMP0, base, 1);
231
tcg_out_opc_bf(s, OPC_INS, lo, TCG_TMP0, 31, 8);
232
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi,
233
}
234
break;
235
236
- case MO_SL:
237
- case MO_UL:
238
+ case MO_32:
239
tcg_out_opc_imm(s, lw1, lo, base, 0);
240
tcg_out_opc_imm(s, lw2, lo, base, 3);
241
if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64 && !sgn) {
242
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi,
243
}
244
break;
245
246
- case MO_UL | MO_BSWAP:
247
- case MO_SL | MO_BSWAP:
248
- if (use_mips32r2_instructions) {
249
- tcg_out_opc_imm(s, lw1, lo, base, 0);
250
- tcg_out_opc_imm(s, lw2, lo, base, 3);
251
- tcg_out_bswap32(s, lo, lo,
252
- TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64
253
- ? (sgn ? TCG_BSWAP_OS : TCG_BSWAP_OZ) : 0);
254
- } else {
255
- const tcg_insn_unit *subr =
256
- (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64 && !sgn
257
- ? bswap32u_addr : bswap32_addr);
258
-
259
- tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 0);
260
- tcg_out_bswap_subr(s, subr);
261
- /* delay slot */
262
- tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 3);
263
- tcg_out_mov(s, type, lo, TCG_TMP3);
264
- }
265
- break;
266
-
267
- case MO_UQ:
268
+ case MO_64:
269
if (TCG_TARGET_REG_BITS == 64) {
270
tcg_out_opc_imm(s, ld1, lo, base, 0);
271
tcg_out_opc_imm(s, ld2, lo, base, 7);
272
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi,
273
}
274
break;
275
276
- case MO_UQ | MO_BSWAP:
277
- if (TCG_TARGET_REG_BITS == 64) {
278
- if (use_mips32r2_instructions) {
279
- tcg_out_opc_imm(s, ld1, lo, base, 0);
280
- tcg_out_opc_imm(s, ld2, lo, base, 7);
281
- tcg_out_bswap64(s, lo, lo);
282
- } else {
283
- tcg_out_opc_imm(s, ld1, TCG_TMP0, base, 0);
284
- tcg_out_bswap_subr(s, bswap64_addr);
285
- /* delay slot */
286
- tcg_out_opc_imm(s, ld2, TCG_TMP0, base, 7);
287
- tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_TMP3);
288
- }
289
- } else if (use_mips32r2_instructions) {
290
- tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 0 + 0);
291
- tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 0 + 3);
292
- tcg_out_opc_imm(s, lw1, TCG_TMP1, base, 4 + 0);
293
- tcg_out_opc_imm(s, lw2, TCG_TMP1, base, 4 + 3);
294
- tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, TCG_TMP0);
295
- tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, TCG_TMP1);
296
- tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? lo : hi, TCG_TMP0, 16);
297
- tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? hi : lo, TCG_TMP1, 16);
298
- } else {
299
- tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 0 + 0);
300
- tcg_out_bswap_subr(s, bswap32_addr);
301
- /* delay slot */
302
- tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 0 + 3);
303
- tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 4 + 0);
304
- tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? lo : hi, TCG_TMP3);
305
- tcg_out_bswap_subr(s, bswap32_addr);
306
- /* delay slot */
307
- tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 4 + 3);
308
- tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? hi : lo, TCG_TMP3);
309
- }
310
- break;
311
-
312
default:
313
g_assert_not_reached();
314
}
315
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
316
static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi,
317
TCGReg base, MemOp opc)
318
{
319
- /* Don't clutter the code below with checks to avoid bswapping ZERO. */
320
- if ((lo | hi) == 0) {
321
- opc &= ~MO_BSWAP;
322
- }
323
-
324
- switch (opc & (MO_SIZE | MO_BSWAP)) {
325
+ switch (opc & MO_SIZE) {
326
case MO_8:
327
tcg_out_opc_imm(s, OPC_SB, lo, base, 0);
328
break;
329
-
330
- case MO_16 | MO_BSWAP:
331
- tcg_out_bswap16(s, TCG_TMP1, lo, 0);
332
- lo = TCG_TMP1;
333
- /* FALLTHRU */
334
case MO_16:
335
tcg_out_opc_imm(s, OPC_SH, lo, base, 0);
336
break;
337
-
338
- case MO_32 | MO_BSWAP:
339
- tcg_out_bswap32(s, TCG_TMP3, lo, 0);
340
- lo = TCG_TMP3;
341
- /* FALLTHRU */
342
case MO_32:
343
tcg_out_opc_imm(s, OPC_SW, lo, base, 0);
344
break;
345
-
346
- case MO_64 | MO_BSWAP:
347
- if (TCG_TARGET_REG_BITS == 64) {
348
- tcg_out_bswap64(s, TCG_TMP3, lo);
349
- tcg_out_opc_imm(s, OPC_SD, TCG_TMP3, base, 0);
350
- } else if (use_mips32r2_instructions) {
351
- tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, MIPS_BE ? lo : hi);
352
- tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, MIPS_BE ? hi : lo);
353
- tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP0, TCG_TMP0, 16);
354
- tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP1, TCG_TMP1, 16);
355
- tcg_out_opc_imm(s, OPC_SW, TCG_TMP0, base, 0);
356
- tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, 4);
357
- } else {
358
- tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? lo : hi, 0);
359
- tcg_out_opc_imm(s, OPC_SW, TCG_TMP3, base, 0);
360
- tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? hi : lo, 0);
361
- tcg_out_opc_imm(s, OPC_SW, TCG_TMP3, base, 4);
362
- }
363
- break;
364
case MO_64:
365
if (TCG_TARGET_REG_BITS == 64) {
366
tcg_out_opc_imm(s, OPC_SD, lo, base, 0);
367
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi,
368
tcg_out_opc_imm(s, OPC_SW, MIPS_BE ? lo : hi, base, 4);
369
}
370
break;
371
-
372
default:
373
g_assert_not_reached();
374
}
375
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_unalign(TCGContext *s, TCGReg lo, TCGReg hi,
376
const MIPSInsn sd1 = MIPS_BE ? OPC_SDL : OPC_SDR;
377
const MIPSInsn sd2 = MIPS_BE ? OPC_SDR : OPC_SDL;
378
379
- /* Don't clutter the code below with checks to avoid bswapping ZERO. */
380
- if ((lo | hi) == 0) {
381
- opc &= ~MO_BSWAP;
382
- }
383
-
384
- switch (opc & (MO_SIZE | MO_BSWAP)) {
385
- case MO_16 | MO_BE:
386
+ switch (opc & MO_SIZE) {
387
+ case MO_16:
388
tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, lo, 8);
389
- tcg_out_opc_imm(s, OPC_SB, TCG_TMP0, base, 0);
390
- tcg_out_opc_imm(s, OPC_SB, lo, base, 1);
391
+ tcg_out_opc_imm(s, OPC_SB, HOST_BIG_ENDIAN ? TCG_TMP0 : lo, base, 0);
392
+ tcg_out_opc_imm(s, OPC_SB, HOST_BIG_ENDIAN ? lo : TCG_TMP0, base, 1);
393
break;
394
395
- case MO_16 | MO_LE:
396
- tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, lo, 8);
397
- tcg_out_opc_imm(s, OPC_SB, lo, base, 0);
398
- tcg_out_opc_imm(s, OPC_SB, TCG_TMP0, base, 1);
399
- break;
400
-
401
- case MO_32 | MO_BSWAP:
402
- tcg_out_bswap32(s, TCG_TMP3, lo, 0);
403
- lo = TCG_TMP3;
404
- /* fall through */
405
case MO_32:
406
tcg_out_opc_imm(s, sw1, lo, base, 0);
407
tcg_out_opc_imm(s, sw2, lo, base, 3);
408
break;
409
410
- case MO_64 | MO_BSWAP:
411
- if (TCG_TARGET_REG_BITS == 64) {
412
- tcg_out_bswap64(s, TCG_TMP3, lo);
413
- lo = TCG_TMP3;
414
- } else if (use_mips32r2_instructions) {
415
- tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, MIPS_BE ? hi : lo);
416
- tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, MIPS_BE ? lo : hi);
417
- tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP0, TCG_TMP0, 16);
418
- tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP1, TCG_TMP1, 16);
419
- hi = MIPS_BE ? TCG_TMP0 : TCG_TMP1;
420
- lo = MIPS_BE ? TCG_TMP1 : TCG_TMP0;
421
- } else {
422
- tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? lo : hi, 0);
423
- tcg_out_opc_imm(s, sw1, TCG_TMP3, base, 0 + 0);
424
- tcg_out_opc_imm(s, sw2, TCG_TMP3, base, 0 + 3);
425
- tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? hi : lo, 0);
426
- tcg_out_opc_imm(s, sw1, TCG_TMP3, base, 4 + 0);
427
- tcg_out_opc_imm(s, sw2, TCG_TMP3, base, 4 + 3);
428
- break;
429
- }
430
- /* fall through */
431
case MO_64:
432
if (TCG_TARGET_REG_BITS == 64) {
433
tcg_out_opc_imm(s, sd1, lo, base, 0);
434
--
435
2.34.1
436
437
diff view generated by jsdifflib
New patch
1
Compare the address vs the tlb entry with sign-extended values.
2
This simplifies the page+alignment mask constant, and the
3
generation of the last byte address for the misaligned test.
1
4
5
Move the tlb addend load up, and the zero-extension down.
6
7
This frees up a register, which allows us use TMP3 as the returned base
8
address register instead of A0, which we were using as a 5th temporary.
9
10
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
13
tcg/mips/tcg-target.c.inc | 38 ++++++++++++++++++--------------------
14
1 file changed, 18 insertions(+), 20 deletions(-)
15
16
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
17
index XXXXXXX..XXXXXXX 100644
18
--- a/tcg/mips/tcg-target.c.inc
19
+++ b/tcg/mips/tcg-target.c.inc
20
@@ -XXX,XX +XXX,XX @@ typedef enum {
21
ALIAS_PADDI = sizeof(void *) == 4 ? OPC_ADDIU : OPC_DADDIU,
22
ALIAS_TSRL = TARGET_LONG_BITS == 32 || TCG_TARGET_REG_BITS == 32
23
? OPC_SRL : OPC_DSRL,
24
+ ALIAS_TADDI = TARGET_LONG_BITS == 32 || TCG_TARGET_REG_BITS == 32
25
+ ? OPC_ADDIU : OPC_DADDIU,
26
} MIPSInsn;
27
28
/*
29
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
30
int add_off = offsetof(CPUTLBEntry, addend);
31
int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
32
: offsetof(CPUTLBEntry, addr_write);
33
- target_ulong tlb_mask;
34
35
ldst = new_ldst_label(s);
36
ldst->is_ld = is_ld;
37
ldst->oi = oi;
38
ldst->addrlo_reg = addrlo;
39
ldst->addrhi_reg = addrhi;
40
- base = TCG_REG_A0;
41
42
/* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
43
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
44
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
45
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
46
tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF);
47
} else {
48
- tcg_out_ldst(s, (TARGET_LONG_BITS == 64 ? OPC_LD
49
- : TCG_TARGET_REG_BITS == 64 ? OPC_LWU : OPC_LW),
50
- TCG_TMP0, TCG_TMP3, cmp_off);
51
+ tcg_out_ld(s, TCG_TYPE_TL, TCG_TMP0, TCG_TMP3, cmp_off);
52
}
53
54
- /* Zero extend a 32-bit guest address for a 64-bit host. */
55
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
56
- tcg_out_ext32u(s, base, addrlo);
57
- addrlo = base;
58
+ if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
59
+ /* Load the tlb addend for the fast path. */
60
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP3, TCG_TMP3, add_off);
61
}
62
63
/*
64
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
65
* For unaligned accesses, compare against the end of the access to
66
* verify that it does not cross a page boundary.
67
*/
68
- tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
69
- tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, tlb_mask);
70
- if (a_mask >= s_mask) {
71
- tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrlo);
72
- } else {
73
- tcg_out_opc_imm(s, ALIAS_PADDI, TCG_TMP2, addrlo, s_mask - a_mask);
74
+ tcg_out_movi(s, TCG_TYPE_TL, TCG_TMP1, TARGET_PAGE_MASK | a_mask);
75
+ if (a_mask < s_mask) {
76
+ tcg_out_opc_imm(s, ALIAS_TADDI, TCG_TMP2, addrlo, s_mask - a_mask);
77
tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP2);
78
+ } else {
79
+ tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrlo);
80
}
81
82
- if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
83
- /* Load the tlb addend for the fast path. */
84
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off);
85
+ /* Zero extend a 32-bit guest address for a 64-bit host. */
86
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
87
+ tcg_out_ext32u(s, TCG_TMP2, addrlo);
88
+ addrlo = TCG_TMP2;
89
}
90
91
ldst->label_ptr[0] = s->code_ptr;
92
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
93
tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF);
94
95
/* Load the tlb addend for the fast path. */
96
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off);
97
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP3, TCG_TMP3, add_off);
98
99
ldst->label_ptr[1] = s->code_ptr;
100
tcg_out_opc_br(s, OPC_BNE, addrhi, TCG_TMP0);
101
}
102
103
/* delay slot */
104
- tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP2, addrlo);
105
+ base = TCG_TMP3;
106
+ tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP3, addrlo);
107
#else
108
if (a_mask && (use_mips32r6_instructions || a_bits != s_bits)) {
109
ldst = new_ldst_label(s);
110
--
111
2.34.1
112
113
diff view generated by jsdifflib
New patch
1
The softmmu tlb uses TCG_REG_TMP[0-3], not any of the normally available
2
registers. Now that we handle overlap betwen inputs and helper arguments,
3
and have eliminated use of A0, we can allow any allocatable reg.
1
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/mips/tcg-target-con-set.h | 13 +++++--------
9
tcg/mips/tcg-target-con-str.h | 2 --
10
tcg/mips/tcg-target.c.inc | 30 ++++++++----------------------
11
3 files changed, 13 insertions(+), 32 deletions(-)
12
13
diff --git a/tcg/mips/tcg-target-con-set.h b/tcg/mips/tcg-target-con-set.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/mips/tcg-target-con-set.h
16
+++ b/tcg/mips/tcg-target-con-set.h
17
@@ -XXX,XX +XXX,XX @@
18
C_O0_I1(r)
19
C_O0_I2(rZ, r)
20
C_O0_I2(rZ, rZ)
21
-C_O0_I2(SZ, S)
22
-C_O0_I3(SZ, S, S)
23
-C_O0_I3(SZ, SZ, S)
24
+C_O0_I3(rZ, r, r)
25
+C_O0_I3(rZ, rZ, r)
26
C_O0_I4(rZ, rZ, rZ, rZ)
27
-C_O0_I4(SZ, SZ, S, S)
28
-C_O1_I1(r, L)
29
+C_O0_I4(rZ, rZ, r, r)
30
C_O1_I1(r, r)
31
C_O1_I2(r, 0, rZ)
32
-C_O1_I2(r, L, L)
33
+C_O1_I2(r, r, r)
34
C_O1_I2(r, r, ri)
35
C_O1_I2(r, r, rI)
36
C_O1_I2(r, r, rIK)
37
@@ -XXX,XX +XXX,XX @@ C_O1_I2(r, rZ, rN)
38
C_O1_I2(r, rZ, rZ)
39
C_O1_I4(r, rZ, rZ, rZ, 0)
40
C_O1_I4(r, rZ, rZ, rZ, rZ)
41
-C_O2_I1(r, r, L)
42
-C_O2_I2(r, r, L, L)
43
+C_O2_I1(r, r, r)
44
C_O2_I2(r, r, r, r)
45
C_O2_I4(r, r, rZ, rZ, rN, rN)
46
diff --git a/tcg/mips/tcg-target-con-str.h b/tcg/mips/tcg-target-con-str.h
47
index XXXXXXX..XXXXXXX 100644
48
--- a/tcg/mips/tcg-target-con-str.h
49
+++ b/tcg/mips/tcg-target-con-str.h
50
@@ -XXX,XX +XXX,XX @@
51
* REGS(letter, register_mask)
52
*/
53
REGS('r', ALL_GENERAL_REGS)
54
-REGS('L', ALL_QLOAD_REGS)
55
-REGS('S', ALL_QSTORE_REGS)
56
57
/*
58
* Define constraint letters for constants:
59
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
60
index XXXXXXX..XXXXXXX 100644
61
--- a/tcg/mips/tcg-target.c.inc
62
+++ b/tcg/mips/tcg-target.c.inc
63
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
64
#define TCG_CT_CONST_WSZ 0x2000 /* word size */
65
66
#define ALL_GENERAL_REGS 0xffffffffu
67
-#define NOA0_REGS (ALL_GENERAL_REGS & ~(1 << TCG_REG_A0))
68
-
69
-#ifdef CONFIG_SOFTMMU
70
-#define ALL_QLOAD_REGS \
71
- (NOA0_REGS & ~((TCG_TARGET_REG_BITS < TARGET_LONG_BITS) << TCG_REG_A2))
72
-#define ALL_QSTORE_REGS \
73
- (NOA0_REGS & ~(TCG_TARGET_REG_BITS < TARGET_LONG_BITS \
74
- ? (1 << TCG_REG_A2) | (1 << TCG_REG_A3) \
75
- : (1 << TCG_REG_A1)))
76
-#else
77
-#define ALL_QLOAD_REGS NOA0_REGS
78
-#define ALL_QSTORE_REGS NOA0_REGS
79
-#endif
80
-
81
82
static bool is_p2m1(tcg_target_long val)
83
{
84
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
85
86
case INDEX_op_qemu_ld_i32:
87
return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
88
- ? C_O1_I1(r, L) : C_O1_I2(r, L, L));
89
+ ? C_O1_I1(r, r) : C_O1_I2(r, r, r));
90
case INDEX_op_qemu_st_i32:
91
return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
92
- ? C_O0_I2(SZ, S) : C_O0_I3(SZ, S, S));
93
+ ? C_O0_I2(rZ, r) : C_O0_I3(rZ, r, r));
94
case INDEX_op_qemu_ld_i64:
95
- return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L)
96
- : TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, L)
97
- : C_O2_I2(r, r, L, L));
98
+ return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r)
99
+ : TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, r)
100
+ : C_O2_I2(r, r, r, r));
101
case INDEX_op_qemu_st_i64:
102
- return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(SZ, S)
103
- : TARGET_LONG_BITS == 32 ? C_O0_I3(SZ, SZ, S)
104
- : C_O0_I4(SZ, SZ, S, S));
105
+ return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(rZ, r)
106
+ : TARGET_LONG_BITS == 32 ? C_O0_I3(rZ, rZ, r)
107
+ : C_O0_I4(rZ, rZ, r, r));
108
109
default:
110
g_assert_not_reached();
111
--
112
2.34.1
113
114
diff view generated by jsdifflib
New patch
1
Allocate TCG_REG_TMP2. Use R0, TMP1, TMP2 instead of any of
2
the normally allocated registers for the tlb load.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/ppc/tcg-target.c.inc | 78 ++++++++++++++++++++++++----------------
9
1 file changed, 47 insertions(+), 31 deletions(-)
10
11
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/ppc/tcg-target.c.inc
14
+++ b/tcg/ppc/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@
16
#else
17
# define TCG_REG_TMP1 TCG_REG_R12
18
#endif
19
+#define TCG_REG_TMP2 TCG_REG_R11
20
21
#define TCG_VEC_TMP1 TCG_REG_V0
22
#define TCG_VEC_TMP2 TCG_REG_V1
23
@@ -XXX,XX +XXX,XX @@ static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
24
/*
25
* For the purposes of ppc32 sorting 4 input registers into 4 argument
26
* registers, there is an outside chance we would require 3 temps.
27
- * Because of constraints, no inputs are in r3, and env will not be
28
- * placed into r3 until after the sorting is done, and is thus free.
29
*/
30
static const TCGLdstHelperParam ldst_helper_param = {
31
.ra_gen = ldst_ra_gen,
32
.ntmp = 3,
33
- .tmp = { TCG_REG_TMP1, TCG_REG_R0, TCG_REG_R3 }
34
+ .tmp = { TCG_REG_TMP1, TCG_REG_TMP2, TCG_REG_R0 }
35
};
36
37
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
38
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
39
/* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
40
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
41
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
42
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0, mask_off);
43
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R4, TCG_AREG0, table_off);
44
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off);
45
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_AREG0, table_off);
46
47
/* Extract the page index, shifted into place for tlb index. */
48
if (TCG_TARGET_REG_BITS == 32) {
49
- tcg_out_shri32(s, TCG_REG_TMP1, addrlo,
50
+ tcg_out_shri32(s, TCG_REG_R0, addrlo,
51
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
52
} else {
53
- tcg_out_shri64(s, TCG_REG_TMP1, addrlo,
54
+ tcg_out_shri64(s, TCG_REG_R0, addrlo,
55
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
56
}
57
- tcg_out32(s, AND | SAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_TMP1));
58
+ tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0));
59
60
- /* Load the TLB comparator. */
61
+ /* Load the (low part) TLB comparator into TMP2. */
62
if (cmp_off == 0 && TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
63
uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32
64
? LWZUX : LDUX);
65
- tcg_out32(s, lxu | TAB(TCG_REG_TMP1, TCG_REG_R3, TCG_REG_R4));
66
+ tcg_out32(s, lxu | TAB(TCG_REG_TMP2, TCG_REG_TMP1, TCG_REG_TMP2));
67
} else {
68
- tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_R4));
69
+ tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2));
70
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
71
- tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4);
72
- tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off);
73
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2,
74
+ TCG_REG_TMP1, cmp_off + 4 * HOST_BIG_ENDIAN);
75
} else {
76
- tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off);
77
+ tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP2, TCG_REG_TMP1, cmp_off);
78
}
79
}
80
81
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
82
* Load the TLB addend for use on the fast path.
83
* Do this asap to minimize any load use delay.
84
*/
85
- h->base = TCG_REG_R3;
86
- tcg_out_ld(s, TCG_TYPE_PTR, h->base, TCG_REG_R3,
87
- offsetof(CPUTLBEntry, addend));
88
+ if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
89
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
90
+ offsetof(CPUTLBEntry, addend));
91
+ }
92
93
- /* Clear the non-page, non-alignment bits from the address */
94
+ /* Clear the non-page, non-alignment bits from the address in R0. */
95
if (TCG_TARGET_REG_BITS == 32) {
96
/*
97
* We don't support unaligned accesses on 32-bits.
98
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
99
if (TARGET_LONG_BITS == 32) {
100
tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
101
(32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
102
- /* Zero-extend the address for use in the final address. */
103
- tcg_out_ext32u(s, TCG_REG_R4, addrlo);
104
- addrlo = TCG_REG_R4;
105
} else if (a_bits == 0) {
106
tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
107
} else {
108
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
109
tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
110
}
111
}
112
- h->index = addrlo;
113
114
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
115
- tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
116
+ /* Low part comparison into cr7. */
117
+ tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2,
118
0, 7, TCG_TYPE_I32);
119
- tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32);
120
+
121
+ /* Load the high part TLB comparator into TMP2. */
122
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1,
123
+ cmp_off + 4 * !HOST_BIG_ENDIAN);
124
+
125
+ /* Load addend, deferred for this case. */
126
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
127
+ offsetof(CPUTLBEntry, addend));
128
+
129
+ /* High part comparison into cr6. */
130
+ tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_TMP2, 0, 6, TCG_TYPE_I32);
131
+
132
+ /* Combine comparisons into cr7. */
133
tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
134
} else {
135
- tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
136
+ /* Full comparison into cr7. */
137
+ tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2,
138
0, 7, TCG_TYPE_TL);
139
}
140
141
/* Load a pointer into the current opcode w/conditional branch-link. */
142
ldst->label_ptr[0] = s->code_ptr;
143
tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
144
+
145
+ h->base = TCG_REG_TMP1;
146
#else
147
if (a_bits) {
148
ldst = new_ldst_label(s);
149
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
150
}
151
152
h->base = guest_base ? TCG_GUEST_BASE_REG : 0;
153
- h->index = addrlo;
154
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
155
- tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
156
- h->index = TCG_REG_TMP1;
157
- }
158
#endif
159
160
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
161
+ /* Zero-extend the guest address for use in the host address. */
162
+ tcg_out_ext32u(s, TCG_REG_R0, addrlo);
163
+ h->index = TCG_REG_R0;
164
+ } else {
165
+ h->index = addrlo;
166
+ }
167
+
168
return ldst;
169
}
170
171
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
172
#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64
173
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
174
#endif
175
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */
176
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
177
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
178
tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1);
179
tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2);
180
if (USE_REG_TB) {
181
--
182
2.34.1
183
184
diff view generated by jsdifflib
New patch
1
The softmmu tlb uses TCG_REG_{TMP1,TMP2,R0}, not any of the normally
2
available registers. Now that we handle overlap betwen inputs and
3
helper arguments, we can allow any allocatable reg.
1
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tcg/ppc/tcg-target-con-set.h | 11 ++++-------
10
tcg/ppc/tcg-target-con-str.h | 2 --
11
tcg/ppc/tcg-target.c.inc | 32 ++++++++++----------------------
12
3 files changed, 14 insertions(+), 31 deletions(-)
13
14
diff --git a/tcg/ppc/tcg-target-con-set.h b/tcg/ppc/tcg-target-con-set.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/ppc/tcg-target-con-set.h
17
+++ b/tcg/ppc/tcg-target-con-set.h
18
@@ -XXX,XX +XXX,XX @@
19
C_O0_I1(r)
20
C_O0_I2(r, r)
21
C_O0_I2(r, ri)
22
-C_O0_I2(S, S)
23
C_O0_I2(v, r)
24
-C_O0_I3(S, S, S)
25
+C_O0_I3(r, r, r)
26
C_O0_I4(r, r, ri, ri)
27
-C_O0_I4(S, S, S, S)
28
-C_O1_I1(r, L)
29
+C_O0_I4(r, r, r, r)
30
C_O1_I1(r, r)
31
C_O1_I1(v, r)
32
C_O1_I1(v, v)
33
C_O1_I1(v, vr)
34
C_O1_I2(r, 0, rZ)
35
-C_O1_I2(r, L, L)
36
C_O1_I2(r, rI, ri)
37
C_O1_I2(r, rI, rT)
38
C_O1_I2(r, r, r)
39
@@ -XXX,XX +XXX,XX @@ C_O1_I2(v, v, v)
40
C_O1_I3(v, v, v, v)
41
C_O1_I4(r, r, ri, rZ, rZ)
42
C_O1_I4(r, r, r, ri, ri)
43
-C_O2_I1(L, L, L)
44
-C_O2_I2(L, L, L, L)
45
+C_O2_I1(r, r, r)
46
+C_O2_I2(r, r, r, r)
47
C_O2_I4(r, r, rI, rZM, r, r)
48
C_O2_I4(r, r, r, r, rI, rZM)
49
diff --git a/tcg/ppc/tcg-target-con-str.h b/tcg/ppc/tcg-target-con-str.h
50
index XXXXXXX..XXXXXXX 100644
51
--- a/tcg/ppc/tcg-target-con-str.h
52
+++ b/tcg/ppc/tcg-target-con-str.h
53
@@ -XXX,XX +XXX,XX @@ REGS('A', 1u << TCG_REG_R3)
54
REGS('B', 1u << TCG_REG_R4)
55
REGS('C', 1u << TCG_REG_R5)
56
REGS('D', 1u << TCG_REG_R6)
57
-REGS('L', ALL_QLOAD_REGS)
58
-REGS('S', ALL_QSTORE_REGS)
59
60
/*
61
* Define constraint letters for constants:
62
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
63
index XXXXXXX..XXXXXXX 100644
64
--- a/tcg/ppc/tcg-target.c.inc
65
+++ b/tcg/ppc/tcg-target.c.inc
66
@@ -XXX,XX +XXX,XX @@
67
#define ALL_GENERAL_REGS 0xffffffffu
68
#define ALL_VECTOR_REGS 0xffffffff00000000ull
69
70
-#ifdef CONFIG_SOFTMMU
71
-#define ALL_QLOAD_REGS \
72
- (ALL_GENERAL_REGS & \
73
- ~((1 << TCG_REG_R3) | (1 << TCG_REG_R4) | (1 << TCG_REG_R5)))
74
-#define ALL_QSTORE_REGS \
75
- (ALL_GENERAL_REGS & ~((1 << TCG_REG_R3) | (1 << TCG_REG_R4) | \
76
- (1 << TCG_REG_R5) | (1 << TCG_REG_R6)))
77
-#else
78
-#define ALL_QLOAD_REGS (ALL_GENERAL_REGS & ~(1 << TCG_REG_R3))
79
-#define ALL_QSTORE_REGS ALL_QLOAD_REGS
80
-#endif
81
-
82
TCGPowerISA have_isa;
83
static bool have_isel;
84
bool have_altivec;
85
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
86
87
case INDEX_op_qemu_ld_i32:
88
return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
89
- ? C_O1_I1(r, L)
90
- : C_O1_I2(r, L, L));
91
+ ? C_O1_I1(r, r)
92
+ : C_O1_I2(r, r, r));
93
94
case INDEX_op_qemu_st_i32:
95
return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
96
- ? C_O0_I2(S, S)
97
- : C_O0_I3(S, S, S));
98
+ ? C_O0_I2(r, r)
99
+ : C_O0_I3(r, r, r));
100
101
case INDEX_op_qemu_ld_i64:
102
- return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L)
103
- : TARGET_LONG_BITS == 32 ? C_O2_I1(L, L, L)
104
- : C_O2_I2(L, L, L, L));
105
+ return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r)
106
+ : TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, r)
107
+ : C_O2_I2(r, r, r, r));
108
109
case INDEX_op_qemu_st_i64:
110
- return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(S, S)
111
- : TARGET_LONG_BITS == 32 ? C_O0_I3(S, S, S)
112
- : C_O0_I4(S, S, S, S));
113
+ return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r)
114
+ : TARGET_LONG_BITS == 32 ? C_O0_I3(r, r, r)
115
+ : C_O0_I4(r, r, r, r));
116
117
case INDEX_op_add_vec:
118
case INDEX_op_sub_vec:
119
--
120
2.34.1
121
122
diff view generated by jsdifflib
New patch
1
These constraints have not been used for quite some time.
1
2
3
Fixes: 77b73de67632 ("Use rem/div[u]_i32 drop div[u]2_i32")
4
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tcg/ppc/tcg-target-con-str.h | 4 ----
10
1 file changed, 4 deletions(-)
11
12
diff --git a/tcg/ppc/tcg-target-con-str.h b/tcg/ppc/tcg-target-con-str.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/ppc/tcg-target-con-str.h
15
+++ b/tcg/ppc/tcg-target-con-str.h
16
@@ -XXX,XX +XXX,XX @@
17
*/
18
REGS('r', ALL_GENERAL_REGS)
19
REGS('v', ALL_VECTOR_REGS)
20
-REGS('A', 1u << TCG_REG_R3)
21
-REGS('B', 1u << TCG_REG_R4)
22
-REGS('C', 1u << TCG_REG_R5)
23
-REGS('D', 1u << TCG_REG_R6)
24
25
/*
26
* Define constraint letters for constants:
27
--
28
2.34.1
29
30
diff view generated by jsdifflib
New patch
1
Never used since its introduction.
1
2
3
Fixes: 3d582c6179c ("tcg-ppc64: Rearrange integer constant constraints")
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/ppc/tcg-target-con-str.h | 1 -
8
tcg/ppc/tcg-target.c.inc | 3 ---
9
2 files changed, 4 deletions(-)
10
11
diff --git a/tcg/ppc/tcg-target-con-str.h b/tcg/ppc/tcg-target-con-str.h
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/ppc/tcg-target-con-str.h
14
+++ b/tcg/ppc/tcg-target-con-str.h
15
@@ -XXX,XX +XXX,XX @@ REGS('v', ALL_VECTOR_REGS)
16
* CONST(letter, TCG_CT_CONST_* bit set)
17
*/
18
CONST('I', TCG_CT_CONST_S16)
19
-CONST('J', TCG_CT_CONST_U16)
20
CONST('M', TCG_CT_CONST_MONE)
21
CONST('T', TCG_CT_CONST_S32)
22
CONST('U', TCG_CT_CONST_U32)
23
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
24
index XXXXXXX..XXXXXXX 100644
25
--- a/tcg/ppc/tcg-target.c.inc
26
+++ b/tcg/ppc/tcg-target.c.inc
27
@@ -XXX,XX +XXX,XX @@
28
#define SZR (TCG_TARGET_REG_BITS / 8)
29
30
#define TCG_CT_CONST_S16 0x100
31
-#define TCG_CT_CONST_U16 0x200
32
#define TCG_CT_CONST_S32 0x400
33
#define TCG_CT_CONST_U32 0x800
34
#define TCG_CT_CONST_ZERO 0x1000
35
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
36
37
if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) {
38
return 1;
39
- } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) {
40
- return 1;
41
} else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
42
return 1;
43
} else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
44
--
45
2.34.1
46
47
diff view generated by jsdifflib
New patch
1
The softmmu tlb uses TCG_REG_TMP[0-2], not any of the normally available
2
registers. Now that we handle overlap betwen inputs and helper arguments,
3
we can allow any allocatable reg.
1
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tcg/riscv/tcg-target-con-set.h | 2 --
10
tcg/riscv/tcg-target-con-str.h | 1 -
11
tcg/riscv/tcg-target.c.inc | 16 +++-------------
12
3 files changed, 3 insertions(+), 16 deletions(-)
13
14
diff --git a/tcg/riscv/tcg-target-con-set.h b/tcg/riscv/tcg-target-con-set.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/riscv/tcg-target-con-set.h
17
+++ b/tcg/riscv/tcg-target-con-set.h
18
@@ -XXX,XX +XXX,XX @@
19
* tcg-target-con-str.h; the constraint combination is inclusive or.
20
*/
21
C_O0_I1(r)
22
-C_O0_I2(LZ, L)
23
C_O0_I2(rZ, r)
24
C_O0_I2(rZ, rZ)
25
-C_O1_I1(r, L)
26
C_O1_I1(r, r)
27
C_O1_I2(r, r, ri)
28
C_O1_I2(r, r, rI)
29
diff --git a/tcg/riscv/tcg-target-con-str.h b/tcg/riscv/tcg-target-con-str.h
30
index XXXXXXX..XXXXXXX 100644
31
--- a/tcg/riscv/tcg-target-con-str.h
32
+++ b/tcg/riscv/tcg-target-con-str.h
33
@@ -XXX,XX +XXX,XX @@
34
* REGS(letter, register_mask)
35
*/
36
REGS('r', ALL_GENERAL_REGS)
37
-REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
38
39
/*
40
* Define constraint letters for constants:
41
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
42
index XXXXXXX..XXXXXXX 100644
43
--- a/tcg/riscv/tcg-target.c.inc
44
+++ b/tcg/riscv/tcg-target.c.inc
45
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
46
#define TCG_CT_CONST_N12 0x400
47
#define TCG_CT_CONST_M12 0x800
48
49
-#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
50
-/*
51
- * For softmmu, we need to avoid conflicts with the first 5
52
- * argument registers to call the helper. Some of these are
53
- * also used for the tlb lookup.
54
- */
55
-#ifdef CONFIG_SOFTMMU
56
-#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_A0, 5)
57
-#else
58
-#define SOFTMMU_RESERVE_REGS 0
59
-#endif
60
+#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
61
62
#define sextreg sextract64
63
64
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
65
66
case INDEX_op_qemu_ld_i32:
67
case INDEX_op_qemu_ld_i64:
68
- return C_O1_I1(r, L);
69
+ return C_O1_I1(r, r);
70
case INDEX_op_qemu_st_i32:
71
case INDEX_op_qemu_st_i64:
72
- return C_O0_I2(LZ, L);
73
+ return C_O0_I2(rZ, r);
74
75
default:
76
g_assert_not_reached();
77
--
78
2.34.1
79
80
diff view generated by jsdifflib
New patch
1
Rather than zero-extend the guest address into a register,
2
use an add instruction which zero-extends the second input.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/s390x/tcg-target.c.inc | 8 +++++---
8
1 file changed, 5 insertions(+), 3 deletions(-)
9
10
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/s390x/tcg-target.c.inc
13
+++ b/tcg/s390x/tcg-target.c.inc
14
@@ -XXX,XX +XXX,XX @@ typedef enum S390Opcode {
15
RRE_ALGR = 0xb90a,
16
RRE_ALCR = 0xb998,
17
RRE_ALCGR = 0xb988,
18
+ RRE_ALGFR = 0xb91a,
19
RRE_CGR = 0xb920,
20
RRE_CLGR = 0xb921,
21
RRE_DLGR = 0xb987,
22
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
23
tcg_out_insn(s, RXY, LG, h->index, TCG_REG_R2, TCG_REG_NONE,
24
offsetof(CPUTLBEntry, addend));
25
26
- h->base = addr_reg;
27
if (TARGET_LONG_BITS == 32) {
28
- tcg_out_ext32u(s, TCG_REG_R3, addr_reg);
29
- h->base = TCG_REG_R3;
30
+ tcg_out_insn(s, RRE, ALGFR, h->index, addr_reg);
31
+ h->base = TCG_REG_NONE;
32
+ } else {
33
+ h->base = addr_reg;
34
}
35
h->disp = 0;
36
#else
37
--
38
2.34.1
39
40
diff view generated by jsdifflib
New patch
1
Adjust the softmmu tlb to use R0+R1, not any of the normally available
2
registers. Since we handle overlap betwen inputs and helper arguments,
3
we can allow any allocatable reg.
1
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/s390x/tcg-target-con-set.h | 2 --
9
tcg/s390x/tcg-target-con-str.h | 1 -
10
tcg/s390x/tcg-target.c.inc | 36 ++++++++++++----------------------
11
3 files changed, 12 insertions(+), 27 deletions(-)
12
13
diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/s390x/tcg-target-con-set.h
16
+++ b/tcg/s390x/tcg-target-con-set.h
17
@@ -XXX,XX +XXX,XX @@
18
* tcg-target-con-str.h; the constraint combination is inclusive or.
19
*/
20
C_O0_I1(r)
21
-C_O0_I2(L, L)
22
C_O0_I2(r, r)
23
C_O0_I2(r, ri)
24
C_O0_I2(r, rA)
25
C_O0_I2(v, r)
26
-C_O1_I1(r, L)
27
C_O1_I1(r, r)
28
C_O1_I1(v, r)
29
C_O1_I1(v, v)
30
diff --git a/tcg/s390x/tcg-target-con-str.h b/tcg/s390x/tcg-target-con-str.h
31
index XXXXXXX..XXXXXXX 100644
32
--- a/tcg/s390x/tcg-target-con-str.h
33
+++ b/tcg/s390x/tcg-target-con-str.h
34
@@ -XXX,XX +XXX,XX @@
35
* REGS(letter, register_mask)
36
*/
37
REGS('r', ALL_GENERAL_REGS)
38
-REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
39
REGS('v', ALL_VECTOR_REGS)
40
REGS('o', 0xaaaa) /* odd numbered general regs */
41
42
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
43
index XXXXXXX..XXXXXXX 100644
44
--- a/tcg/s390x/tcg-target.c.inc
45
+++ b/tcg/s390x/tcg-target.c.inc
46
@@ -XXX,XX +XXX,XX @@
47
#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 16)
48
#define ALL_VECTOR_REGS MAKE_64BIT_MASK(32, 32)
49
50
-/*
51
- * For softmmu, we need to avoid conflicts with the first 3
52
- * argument registers to perform the tlb lookup, and to call
53
- * the helper function.
54
- */
55
-#ifdef CONFIG_SOFTMMU
56
-#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_R2, 3)
57
-#else
58
-#define SOFTMMU_RESERVE_REGS 0
59
-#endif
60
-
61
-
62
/* Several places within the instruction set 0 means "no register"
63
rather than TCG_REG_R0. */
64
#define TCG_REG_NONE 0
65
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
66
ldst->oi = oi;
67
ldst->addrlo_reg = addr_reg;
68
69
- tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE,
70
+ tcg_out_sh64(s, RSY_SRLG, TCG_TMP0, addr_reg, TCG_REG_NONE,
71
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
72
73
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
74
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
75
- tcg_out_insn(s, RXY, NG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, mask_off);
76
- tcg_out_insn(s, RXY, AG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, table_off);
77
+ tcg_out_insn(s, RXY, NG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, mask_off);
78
+ tcg_out_insn(s, RXY, AG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, table_off);
79
80
/*
81
* For aligned accesses, we check the first byte and include the alignment
82
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
83
a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
84
tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
85
if (a_off == 0) {
86
- tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask);
87
+ tgen_andi_risbg(s, TCG_REG_R0, addr_reg, tlb_mask);
88
} else {
89
- tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off);
90
- tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask);
91
+ tcg_out_insn(s, RX, LA, TCG_REG_R0, addr_reg, TCG_REG_NONE, a_off);
92
+ tgen_andi(s, TCG_TYPE_TL, TCG_REG_R0, tlb_mask);
93
}
94
95
if (is_ld) {
96
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
97
ofs = offsetof(CPUTLBEntry, addr_write);
98
}
99
if (TARGET_LONG_BITS == 32) {
100
- tcg_out_insn(s, RX, C, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
101
+ tcg_out_insn(s, RX, C, TCG_REG_R0, TCG_TMP0, TCG_REG_NONE, ofs);
102
} else {
103
- tcg_out_insn(s, RXY, CG, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
104
+ tcg_out_insn(s, RXY, CG, TCG_REG_R0, TCG_TMP0, TCG_REG_NONE, ofs);
105
}
106
107
tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
108
ldst->label_ptr[0] = s->code_ptr++;
109
110
- h->index = TCG_REG_R2;
111
- tcg_out_insn(s, RXY, LG, h->index, TCG_REG_R2, TCG_REG_NONE,
112
+ h->index = TCG_TMP0;
113
+ tcg_out_insn(s, RXY, LG, h->index, TCG_TMP0, TCG_REG_NONE,
114
offsetof(CPUTLBEntry, addend));
115
116
if (TARGET_LONG_BITS == 32) {
117
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
118
119
case INDEX_op_qemu_ld_i32:
120
case INDEX_op_qemu_ld_i64:
121
- return C_O1_I1(r, L);
122
+ return C_O1_I1(r, r);
123
case INDEX_op_qemu_st_i64:
124
case INDEX_op_qemu_st_i32:
125
- return C_O0_I2(L, L);
126
+ return C_O0_I2(r, r);
127
128
case INDEX_op_deposit_i32:
129
case INDEX_op_deposit_i64:
130
--
131
2.34.1
132
133
diff view generated by jsdifflib
New patch
1
These are atomic operations, so mark as requiring alignment.
1
2
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
target/mips/tcg/nanomips_translate.c.inc | 5 +++--
6
1 file changed, 3 insertions(+), 2 deletions(-)
7
8
diff --git a/target/mips/tcg/nanomips_translate.c.inc b/target/mips/tcg/nanomips_translate.c.inc
9
index XXXXXXX..XXXXXXX 100644
10
--- a/target/mips/tcg/nanomips_translate.c.inc
11
+++ b/target/mips/tcg/nanomips_translate.c.inc
12
@@ -XXX,XX +XXX,XX @@ static void gen_llwp(DisasContext *ctx, uint32_t base, int16_t offset,
13
TCGv tmp2 = tcg_temp_new();
14
15
gen_base_offset_addr(ctx, taddr, base, offset);
16
- tcg_gen_qemu_ld_i64(tval, taddr, ctx->mem_idx, MO_TEUQ);
17
+ tcg_gen_qemu_ld_i64(tval, taddr, ctx->mem_idx, MO_TEUQ | MO_ALIGN);
18
if (cpu_is_bigendian(ctx)) {
19
tcg_gen_extr_i64_tl(tmp2, tmp1, tval);
20
} else {
21
@@ -XXX,XX +XXX,XX @@ static void gen_scwp(DisasContext *ctx, uint32_t base, int16_t offset,
22
23
tcg_gen_ld_i64(llval, cpu_env, offsetof(CPUMIPSState, llval_wp));
24
tcg_gen_atomic_cmpxchg_i64(val, taddr, llval, tval,
25
- eva ? MIPS_HFLAG_UM : ctx->mem_idx, MO_64);
26
+ eva ? MIPS_HFLAG_UM : ctx->mem_idx,
27
+ MO_64 | MO_ALIGN);
28
if (reg1 != 0) {
29
tcg_gen_movi_tl(cpu_gpr[reg1], 1);
30
}
31
--
32
2.34.1
diff view generated by jsdifflib
New patch
1
Memory operations that are not already aligned, or otherwise
2
marked up, require addition of ctx->default_tcg_memop_mask.
1
3
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
target/mips/tcg/mxu_translate.c | 3 ++-
7
target/mips/tcg/micromips_translate.c.inc | 24 ++++++++++++++--------
8
target/mips/tcg/mips16e_translate.c.inc | 18 ++++++++++------
9
target/mips/tcg/nanomips_translate.c.inc | 25 +++++++++++------------
10
4 files changed, 42 insertions(+), 28 deletions(-)
11
12
diff --git a/target/mips/tcg/mxu_translate.c b/target/mips/tcg/mxu_translate.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/mips/tcg/mxu_translate.c
15
+++ b/target/mips/tcg/mxu_translate.c
16
@@ -XXX,XX +XXX,XX @@ static void gen_mxu_s32ldd_s32lddr(DisasContext *ctx)
17
tcg_gen_ori_tl(t1, t1, 0xFFFFF000);
18
}
19
tcg_gen_add_tl(t1, t0, t1);
20
- tcg_gen_qemu_ld_tl(t1, t1, ctx->mem_idx, MO_TESL ^ (sel * MO_BSWAP));
21
+ tcg_gen_qemu_ld_tl(t1, t1, ctx->mem_idx, (MO_TESL ^ (sel * MO_BSWAP)) |
22
+ ctx->default_tcg_memop_mask);
23
24
gen_store_mxu_gpr(t1, XRa);
25
}
26
diff --git a/target/mips/tcg/micromips_translate.c.inc b/target/mips/tcg/micromips_translate.c.inc
27
index XXXXXXX..XXXXXXX 100644
28
--- a/target/mips/tcg/micromips_translate.c.inc
29
+++ b/target/mips/tcg/micromips_translate.c.inc
30
@@ -XXX,XX +XXX,XX @@ static void gen_ldst_pair(DisasContext *ctx, uint32_t opc, int rd,
31
gen_reserved_instruction(ctx);
32
return;
33
}
34
- tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL);
35
+ tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL |
36
+ ctx->default_tcg_memop_mask);
37
gen_store_gpr(t1, rd);
38
tcg_gen_movi_tl(t1, 4);
39
gen_op_addr_add(ctx, t0, t0, t1);
40
- tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL);
41
+ tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL |
42
+ ctx->default_tcg_memop_mask);
43
gen_store_gpr(t1, rd + 1);
44
break;
45
case SWP:
46
gen_load_gpr(t1, rd);
47
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
48
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
49
+ ctx->default_tcg_memop_mask);
50
tcg_gen_movi_tl(t1, 4);
51
gen_op_addr_add(ctx, t0, t0, t1);
52
gen_load_gpr(t1, rd + 1);
53
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
54
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
55
+ ctx->default_tcg_memop_mask);
56
break;
57
#ifdef TARGET_MIPS64
58
case LDP:
59
@@ -XXX,XX +XXX,XX @@ static void gen_ldst_pair(DisasContext *ctx, uint32_t opc, int rd,
60
gen_reserved_instruction(ctx);
61
return;
62
}
63
- tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEUQ);
64
+ tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEUQ |
65
+ ctx->default_tcg_memop_mask);
66
gen_store_gpr(t1, rd);
67
tcg_gen_movi_tl(t1, 8);
68
gen_op_addr_add(ctx, t0, t0, t1);
69
- tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEUQ);
70
+ tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEUQ |
71
+ ctx->default_tcg_memop_mask);
72
gen_store_gpr(t1, rd + 1);
73
break;
74
case SDP:
75
gen_load_gpr(t1, rd);
76
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUQ);
77
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUQ |
78
+ ctx->default_tcg_memop_mask);
79
tcg_gen_movi_tl(t1, 8);
80
gen_op_addr_add(ctx, t0, t0, t1);
81
gen_load_gpr(t1, rd + 1);
82
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUQ);
83
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUQ |
84
+ ctx->default_tcg_memop_mask);
85
break;
86
#endif
87
}
88
diff --git a/target/mips/tcg/mips16e_translate.c.inc b/target/mips/tcg/mips16e_translate.c.inc
89
index XXXXXXX..XXXXXXX 100644
90
--- a/target/mips/tcg/mips16e_translate.c.inc
91
+++ b/target/mips/tcg/mips16e_translate.c.inc
92
@@ -XXX,XX +XXX,XX @@ static void gen_mips16_save(DisasContext *ctx,
93
case 4:
94
gen_base_offset_addr(ctx, t0, 29, 12);
95
gen_load_gpr(t1, 7);
96
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
97
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
98
+ ctx->default_tcg_memop_mask);
99
/* Fall through */
100
case 3:
101
gen_base_offset_addr(ctx, t0, 29, 8);
102
gen_load_gpr(t1, 6);
103
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
104
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
105
+ ctx->default_tcg_memop_mask);
106
/* Fall through */
107
case 2:
108
gen_base_offset_addr(ctx, t0, 29, 4);
109
gen_load_gpr(t1, 5);
110
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
111
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
112
+ ctx->default_tcg_memop_mask);
113
/* Fall through */
114
case 1:
115
gen_base_offset_addr(ctx, t0, 29, 0);
116
gen_load_gpr(t1, 4);
117
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
118
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
119
+ ctx->default_tcg_memop_mask);
120
}
121
122
gen_load_gpr(t0, 29);
123
@@ -XXX,XX +XXX,XX @@ static void gen_mips16_save(DisasContext *ctx,
124
tcg_gen_movi_tl(t2, -4); \
125
gen_op_addr_add(ctx, t0, t0, t2); \
126
gen_load_gpr(t1, reg); \
127
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL); \
128
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL | \
129
+ ctx->default_tcg_memop_mask); \
130
} while (0)
131
132
if (do_ra) {
133
@@ -XXX,XX +XXX,XX @@ static void gen_mips16_restore(DisasContext *ctx,
134
#define DECR_AND_LOAD(reg) do { \
135
tcg_gen_movi_tl(t2, -4); \
136
gen_op_addr_add(ctx, t0, t0, t2); \
137
- tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL); \
138
+ tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL | \
139
+ ctx->default_tcg_memop_mask); \
140
gen_store_gpr(t1, reg); \
141
} while (0)
142
143
diff --git a/target/mips/tcg/nanomips_translate.c.inc b/target/mips/tcg/nanomips_translate.c.inc
144
index XXXXXXX..XXXXXXX 100644
145
--- a/target/mips/tcg/nanomips_translate.c.inc
146
+++ b/target/mips/tcg/nanomips_translate.c.inc
147
@@ -XXX,XX +XXX,XX @@ static void gen_p_lsx(DisasContext *ctx, int rd, int rs, int rt)
148
149
switch (extract32(ctx->opcode, 7, 4)) {
150
case NM_LBX:
151
- tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx,
152
- MO_SB);
153
+ tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_SB);
154
gen_store_gpr(t0, rd);
155
break;
156
case NM_LHX:
157
/*case NM_LHXS:*/
158
tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx,
159
- MO_TESW);
160
+ MO_TESW | ctx->default_tcg_memop_mask);
161
gen_store_gpr(t0, rd);
162
break;
163
case NM_LWX:
164
/*case NM_LWXS:*/
165
tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx,
166
- MO_TESL);
167
+ MO_TESL | ctx->default_tcg_memop_mask);
168
gen_store_gpr(t0, rd);
169
break;
170
case NM_LBUX:
171
- tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx,
172
- MO_UB);
173
+ tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_UB);
174
gen_store_gpr(t0, rd);
175
break;
176
case NM_LHUX:
177
/*case NM_LHUXS:*/
178
tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx,
179
- MO_TEUW);
180
+ MO_TEUW | ctx->default_tcg_memop_mask);
181
gen_store_gpr(t0, rd);
182
break;
183
case NM_SBX:
184
check_nms(ctx);
185
gen_load_gpr(t1, rd);
186
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
187
- MO_8);
188
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_8);
189
break;
190
case NM_SHX:
191
/*case NM_SHXS:*/
192
check_nms(ctx);
193
gen_load_gpr(t1, rd);
194
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
195
- MO_TEUW);
196
+ MO_TEUW | ctx->default_tcg_memop_mask);
197
break;
198
case NM_SWX:
199
/*case NM_SWXS:*/
200
check_nms(ctx);
201
gen_load_gpr(t1, rd);
202
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
203
- MO_TEUL);
204
+ MO_TEUL | ctx->default_tcg_memop_mask);
205
break;
206
case NM_LWC1X:
207
/*case NM_LWC1XS:*/
208
@@ -XXX,XX +XXX,XX @@ static int decode_nanomips_32_48_opc(CPUMIPSState *env, DisasContext *ctx)
209
addr_off);
210
211
tcg_gen_movi_tl(t0, addr);
212
- tcg_gen_qemu_ld_tl(cpu_gpr[rt], t0, ctx->mem_idx, MO_TESL);
213
+ tcg_gen_qemu_ld_tl(cpu_gpr[rt], t0, ctx->mem_idx,
214
+ MO_TESL | ctx->default_tcg_memop_mask);
215
}
216
break;
217
case NM_SWPC48:
218
@@ -XXX,XX +XXX,XX @@ static int decode_nanomips_32_48_opc(CPUMIPSState *env, DisasContext *ctx)
219
tcg_gen_movi_tl(t0, addr);
220
gen_load_gpr(t1, rt);
221
222
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
223
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
224
+ MO_TEUL | ctx->default_tcg_memop_mask);
225
}
226
break;
227
default:
228
--
229
2.34.1
diff view generated by jsdifflib
New patch
1
The opposite of MO_UNALN is MO_ALIGN.
1
2
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
target/mips/tcg/nanomips_translate.c.inc | 2 +-
7
1 file changed, 1 insertion(+), 1 deletion(-)
8
9
diff --git a/target/mips/tcg/nanomips_translate.c.inc b/target/mips/tcg/nanomips_translate.c.inc
10
index XXXXXXX..XXXXXXX 100644
11
--- a/target/mips/tcg/nanomips_translate.c.inc
12
+++ b/target/mips/tcg/nanomips_translate.c.inc
13
@@ -XXX,XX +XXX,XX @@ static int decode_nanomips_32_48_opc(CPUMIPSState *env, DisasContext *ctx)
14
TCGv va = tcg_temp_new();
15
TCGv t1 = tcg_temp_new();
16
MemOp memop = (extract32(ctx->opcode, 8, 3)) ==
17
- NM_P_LS_UAWM ? MO_UNALN : 0;
18
+ NM_P_LS_UAWM ? MO_UNALN : MO_ALIGN;
19
20
count = (count == 0) ? 8 : count;
21
while (counter != count) {
22
--
23
2.34.1
24
25
diff view generated by jsdifflib
New patch
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
---
3
configs/targets/mips-linux-user.mak | 1 -
4
configs/targets/mips-softmmu.mak | 1 -
5
configs/targets/mips64-linux-user.mak | 1 -
6
configs/targets/mips64-softmmu.mak | 1 -
7
configs/targets/mips64el-linux-user.mak | 1 -
8
configs/targets/mips64el-softmmu.mak | 1 -
9
configs/targets/mipsel-linux-user.mak | 1 -
10
configs/targets/mipsel-softmmu.mak | 1 -
11
configs/targets/mipsn32-linux-user.mak | 1 -
12
configs/targets/mipsn32el-linux-user.mak | 1 -
13
10 files changed, 10 deletions(-)
1
14
15
diff --git a/configs/targets/mips-linux-user.mak b/configs/targets/mips-linux-user.mak
16
index XXXXXXX..XXXXXXX 100644
17
--- a/configs/targets/mips-linux-user.mak
18
+++ b/configs/targets/mips-linux-user.mak
19
@@ -XXX,XX +XXX,XX @@ TARGET_ARCH=mips
20
TARGET_ABI_MIPSO32=y
21
TARGET_SYSTBL_ABI=o32
22
TARGET_SYSTBL=syscall_o32.tbl
23
-TARGET_ALIGNED_ONLY=y
24
TARGET_BIG_ENDIAN=y
25
diff --git a/configs/targets/mips-softmmu.mak b/configs/targets/mips-softmmu.mak
26
index XXXXXXX..XXXXXXX 100644
27
--- a/configs/targets/mips-softmmu.mak
28
+++ b/configs/targets/mips-softmmu.mak
29
@@ -XXX,XX +XXX,XX @@
30
TARGET_ARCH=mips
31
-TARGET_ALIGNED_ONLY=y
32
TARGET_BIG_ENDIAN=y
33
TARGET_SUPPORTS_MTTCG=y
34
diff --git a/configs/targets/mips64-linux-user.mak b/configs/targets/mips64-linux-user.mak
35
index XXXXXXX..XXXXXXX 100644
36
--- a/configs/targets/mips64-linux-user.mak
37
+++ b/configs/targets/mips64-linux-user.mak
38
@@ -XXX,XX +XXX,XX @@ TARGET_ABI_MIPSN64=y
39
TARGET_BASE_ARCH=mips
40
TARGET_SYSTBL_ABI=n64
41
TARGET_SYSTBL=syscall_n64.tbl
42
-TARGET_ALIGNED_ONLY=y
43
TARGET_BIG_ENDIAN=y
44
diff --git a/configs/targets/mips64-softmmu.mak b/configs/targets/mips64-softmmu.mak
45
index XXXXXXX..XXXXXXX 100644
46
--- a/configs/targets/mips64-softmmu.mak
47
+++ b/configs/targets/mips64-softmmu.mak
48
@@ -XXX,XX +XXX,XX @@
49
TARGET_ARCH=mips64
50
TARGET_BASE_ARCH=mips
51
-TARGET_ALIGNED_ONLY=y
52
TARGET_BIG_ENDIAN=y
53
diff --git a/configs/targets/mips64el-linux-user.mak b/configs/targets/mips64el-linux-user.mak
54
index XXXXXXX..XXXXXXX 100644
55
--- a/configs/targets/mips64el-linux-user.mak
56
+++ b/configs/targets/mips64el-linux-user.mak
57
@@ -XXX,XX +XXX,XX @@ TARGET_ABI_MIPSN64=y
58
TARGET_BASE_ARCH=mips
59
TARGET_SYSTBL_ABI=n64
60
TARGET_SYSTBL=syscall_n64.tbl
61
-TARGET_ALIGNED_ONLY=y
62
diff --git a/configs/targets/mips64el-softmmu.mak b/configs/targets/mips64el-softmmu.mak
63
index XXXXXXX..XXXXXXX 100644
64
--- a/configs/targets/mips64el-softmmu.mak
65
+++ b/configs/targets/mips64el-softmmu.mak
66
@@ -XXX,XX +XXX,XX @@
67
TARGET_ARCH=mips64
68
TARGET_BASE_ARCH=mips
69
-TARGET_ALIGNED_ONLY=y
70
TARGET_NEED_FDT=y
71
diff --git a/configs/targets/mipsel-linux-user.mak b/configs/targets/mipsel-linux-user.mak
72
index XXXXXXX..XXXXXXX 100644
73
--- a/configs/targets/mipsel-linux-user.mak
74
+++ b/configs/targets/mipsel-linux-user.mak
75
@@ -XXX,XX +XXX,XX @@ TARGET_ARCH=mips
76
TARGET_ABI_MIPSO32=y
77
TARGET_SYSTBL_ABI=o32
78
TARGET_SYSTBL=syscall_o32.tbl
79
-TARGET_ALIGNED_ONLY=y
80
diff --git a/configs/targets/mipsel-softmmu.mak b/configs/targets/mipsel-softmmu.mak
81
index XXXXXXX..XXXXXXX 100644
82
--- a/configs/targets/mipsel-softmmu.mak
83
+++ b/configs/targets/mipsel-softmmu.mak
84
@@ -XXX,XX +XXX,XX @@
85
TARGET_ARCH=mips
86
-TARGET_ALIGNED_ONLY=y
87
TARGET_SUPPORTS_MTTCG=y
88
diff --git a/configs/targets/mipsn32-linux-user.mak b/configs/targets/mipsn32-linux-user.mak
89
index XXXXXXX..XXXXXXX 100644
90
--- a/configs/targets/mipsn32-linux-user.mak
91
+++ b/configs/targets/mipsn32-linux-user.mak
92
@@ -XXX,XX +XXX,XX @@ TARGET_ABI32=y
93
TARGET_BASE_ARCH=mips
94
TARGET_SYSTBL_ABI=n32
95
TARGET_SYSTBL=syscall_n32.tbl
96
-TARGET_ALIGNED_ONLY=y
97
TARGET_BIG_ENDIAN=y
98
diff --git a/configs/targets/mipsn32el-linux-user.mak b/configs/targets/mipsn32el-linux-user.mak
99
index XXXXXXX..XXXXXXX 100644
100
--- a/configs/targets/mipsn32el-linux-user.mak
101
+++ b/configs/targets/mipsn32el-linux-user.mak
102
@@ -XXX,XX +XXX,XX @@ TARGET_ABI32=y
103
TARGET_BASE_ARCH=mips
104
TARGET_SYSTBL_ABI=n32
105
TARGET_SYSTBL=syscall_n32.tbl
106
-TARGET_ALIGNED_ONLY=y
107
--
108
2.34.1
diff view generated by jsdifflib
New patch
1
In gen_ldx/gen_stx, the only two locations for memory operations,
2
mark the operation as either aligned (softmmu) or unaligned
3
(user-only, as if emulated by the kernel).
1
4
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
configs/targets/nios2-softmmu.mak | 1 -
9
target/nios2/translate.c | 10 ++++++++++
10
2 files changed, 10 insertions(+), 1 deletion(-)
11
12
diff --git a/configs/targets/nios2-softmmu.mak b/configs/targets/nios2-softmmu.mak
13
index XXXXXXX..XXXXXXX 100644
14
--- a/configs/targets/nios2-softmmu.mak
15
+++ b/configs/targets/nios2-softmmu.mak
16
@@ -XXX,XX +XXX,XX @@
17
TARGET_ARCH=nios2
18
-TARGET_ALIGNED_ONLY=y
19
TARGET_NEED_FDT=y
20
diff --git a/target/nios2/translate.c b/target/nios2/translate.c
21
index XXXXXXX..XXXXXXX 100644
22
--- a/target/nios2/translate.c
23
+++ b/target/nios2/translate.c
24
@@ -XXX,XX +XXX,XX @@ static void gen_ldx(DisasContext *dc, uint32_t code, uint32_t flags)
25
TCGv data = dest_gpr(dc, instr.b);
26
27
tcg_gen_addi_tl(addr, load_gpr(dc, instr.a), instr.imm16.s);
28
+#ifdef CONFIG_USER_ONLY
29
+ flags |= MO_UNALN;
30
+#else
31
+ flags |= MO_ALIGN;
32
+#endif
33
tcg_gen_qemu_ld_tl(data, addr, dc->mem_idx, flags);
34
}
35
36
@@ -XXX,XX +XXX,XX @@ static void gen_stx(DisasContext *dc, uint32_t code, uint32_t flags)
37
38
TCGv addr = tcg_temp_new();
39
tcg_gen_addi_tl(addr, load_gpr(dc, instr.a), instr.imm16.s);
40
+#ifdef CONFIG_USER_ONLY
41
+ flags |= MO_UNALN;
42
+#else
43
+ flags |= MO_ALIGN;
44
+#endif
45
tcg_gen_qemu_st_tl(val, addr, dc->mem_idx, flags);
46
}
47
48
--
49
2.34.1
50
51
diff view generated by jsdifflib
New patch
1
Mark all memory operations that are not already marked with UNALIGN.
1
2
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
target/sh4/translate.c | 102 ++++++++++++++++++++++++++---------------
7
1 file changed, 66 insertions(+), 36 deletions(-)
8
9
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/target/sh4/translate.c
12
+++ b/target/sh4/translate.c
13
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
14
case 0x9000:        /* mov.w @(disp,PC),Rn */
15
    {
16
TCGv addr = tcg_constant_i32(ctx->base.pc_next + 4 + B7_0 * 2);
17
- tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx, MO_TESW);
18
+ tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx,
19
+ MO_TESW | MO_ALIGN);
20
    }
21
    return;
22
case 0xd000:        /* mov.l @(disp,PC),Rn */
23
    {
24
TCGv addr = tcg_constant_i32((ctx->base.pc_next + 4 + B7_0 * 4) & ~3);
25
- tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx, MO_TESL);
26
+ tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx,
27
+ MO_TESL | MO_ALIGN);
28
    }
29
    return;
30
case 0x7000:        /* add #imm,Rn */
31
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
32
    {
33
     TCGv arg0, arg1;
34
     arg0 = tcg_temp_new();
35
- tcg_gen_qemu_ld_i32(arg0, REG(B7_4), ctx->memidx, MO_TESL);
36
+ tcg_gen_qemu_ld_i32(arg0, REG(B7_4), ctx->memidx,
37
+ MO_TESL | MO_ALIGN);
38
     arg1 = tcg_temp_new();
39
- tcg_gen_qemu_ld_i32(arg1, REG(B11_8), ctx->memidx, MO_TESL);
40
+ tcg_gen_qemu_ld_i32(arg1, REG(B11_8), ctx->memidx,
41
+ MO_TESL | MO_ALIGN);
42
gen_helper_macl(cpu_env, arg0, arg1);
43
     tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 4);
44
     tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
45
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
46
    {
47
     TCGv arg0, arg1;
48
     arg0 = tcg_temp_new();
49
- tcg_gen_qemu_ld_i32(arg0, REG(B7_4), ctx->memidx, MO_TESL);
50
+ tcg_gen_qemu_ld_i32(arg0, REG(B7_4), ctx->memidx,
51
+ MO_TESL | MO_ALIGN);
52
     arg1 = tcg_temp_new();
53
- tcg_gen_qemu_ld_i32(arg1, REG(B11_8), ctx->memidx, MO_TESL);
54
+ tcg_gen_qemu_ld_i32(arg1, REG(B11_8), ctx->memidx,
55
+ MO_TESL | MO_ALIGN);
56
gen_helper_macw(cpu_env, arg0, arg1);
57
     tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 2);
58
     tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 2);
59
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
60
if (ctx->tbflags & FPSCR_SZ) {
61
TCGv_i64 fp = tcg_temp_new_i64();
62
gen_load_fpr64(ctx, fp, XHACK(B7_4));
63
- tcg_gen_qemu_st_i64(fp, REG(B11_8), ctx->memidx, MO_TEUQ);
64
+ tcg_gen_qemu_st_i64(fp, REG(B11_8), ctx->memidx,
65
+ MO_TEUQ | MO_ALIGN);
66
    } else {
67
- tcg_gen_qemu_st_i32(FREG(B7_4), REG(B11_8), ctx->memidx, MO_TEUL);
68
+ tcg_gen_qemu_st_i32(FREG(B7_4), REG(B11_8), ctx->memidx,
69
+ MO_TEUL | MO_ALIGN);
70
    }
71
    return;
72
case 0xf008: /* fmov @Rm,{F,D,X}Rn - FPSCR: Nothing */
73
    CHECK_FPU_ENABLED
74
if (ctx->tbflags & FPSCR_SZ) {
75
TCGv_i64 fp = tcg_temp_new_i64();
76
- tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx, MO_TEUQ);
77
+ tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx,
78
+ MO_TEUQ | MO_ALIGN);
79
gen_store_fpr64(ctx, fp, XHACK(B11_8));
80
    } else {
81
- tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx, MO_TEUL);
82
+ tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx,
83
+ MO_TEUL | MO_ALIGN);
84
    }
85
    return;
86
case 0xf009: /* fmov @Rm+,{F,D,X}Rn - FPSCR: Nothing */
87
    CHECK_FPU_ENABLED
88
if (ctx->tbflags & FPSCR_SZ) {
89
TCGv_i64 fp = tcg_temp_new_i64();
90
- tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx, MO_TEUQ);
91
+ tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx,
92
+ MO_TEUQ | MO_ALIGN);
93
gen_store_fpr64(ctx, fp, XHACK(B11_8));
94
tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 8);
95
    } else {
96
- tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx, MO_TEUL);
97
+ tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx,
98
+ MO_TEUL | MO_ALIGN);
99
     tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 4);
100
    }
101
    return;
102
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
103
TCGv_i64 fp = tcg_temp_new_i64();
104
gen_load_fpr64(ctx, fp, XHACK(B7_4));
105
tcg_gen_subi_i32(addr, REG(B11_8), 8);
106
- tcg_gen_qemu_st_i64(fp, addr, ctx->memidx, MO_TEUQ);
107
+ tcg_gen_qemu_st_i64(fp, addr, ctx->memidx,
108
+ MO_TEUQ | MO_ALIGN);
109
} else {
110
tcg_gen_subi_i32(addr, REG(B11_8), 4);
111
- tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx, MO_TEUL);
112
+ tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx,
113
+ MO_TEUL | MO_ALIGN);
114
}
115
tcg_gen_mov_i32(REG(B11_8), addr);
116
}
117
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
118
     tcg_gen_add_i32(addr, REG(B7_4), REG(0));
119
if (ctx->tbflags & FPSCR_SZ) {
120
TCGv_i64 fp = tcg_temp_new_i64();
121
- tcg_gen_qemu_ld_i64(fp, addr, ctx->memidx, MO_TEUQ);
122
+ tcg_gen_qemu_ld_i64(fp, addr, ctx->memidx,
123
+ MO_TEUQ | MO_ALIGN);
124
gen_store_fpr64(ctx, fp, XHACK(B11_8));
125
     } else {
126
- tcg_gen_qemu_ld_i32(FREG(B11_8), addr, ctx->memidx, MO_TEUL);
127
+ tcg_gen_qemu_ld_i32(FREG(B11_8), addr, ctx->memidx,
128
+ MO_TEUL | MO_ALIGN);
129
     }
130
    }
131
    return;
132
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
133
if (ctx->tbflags & FPSCR_SZ) {
134
TCGv_i64 fp = tcg_temp_new_i64();
135
gen_load_fpr64(ctx, fp, XHACK(B7_4));
136
- tcg_gen_qemu_st_i64(fp, addr, ctx->memidx, MO_TEUQ);
137
+ tcg_gen_qemu_st_i64(fp, addr, ctx->memidx,
138
+ MO_TEUQ | MO_ALIGN);
139
     } else {
140
- tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx, MO_TEUL);
141
+ tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx,
142
+ MO_TEUL | MO_ALIGN);
143
     }
144
    }
145
    return;
146
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
147
    {
148
     TCGv addr = tcg_temp_new();
149
     tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 2);
150
- tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESW);
151
+ tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESW | MO_ALIGN);
152
    }
153
    return;
154
case 0xc600:        /* mov.l @(disp,GBR),R0 */
155
    {
156
     TCGv addr = tcg_temp_new();
157
     tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 4);
158
- tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESL);
159
+ tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESL | MO_ALIGN);
160
    }
161
    return;
162
case 0xc000:        /* mov.b R0,@(disp,GBR) */
163
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
164
    {
165
     TCGv addr = tcg_temp_new();
166
     tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 2);
167
- tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUW);
168
+ tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUW | MO_ALIGN);
169
    }
170
    return;
171
case 0xc200:        /* mov.l R0,@(disp,GBR) */
172
    {
173
     TCGv addr = tcg_temp_new();
174
     tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 4);
175
- tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUL);
176
+ tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUL | MO_ALIGN);
177
    }
178
    return;
179
case 0x8000:        /* mov.b R0,@(disp,Rn) */
180
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
181
    return;
182
case 0x4087:        /* ldc.l @Rm+,Rn_BANK */
183
    CHECK_PRIVILEGED
184
- tcg_gen_qemu_ld_i32(ALTREG(B6_4), REG(B11_8), ctx->memidx, MO_TESL);
185
+ tcg_gen_qemu_ld_i32(ALTREG(B6_4), REG(B11_8), ctx->memidx,
186
+ MO_TESL | MO_ALIGN);
187
    tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
188
    return;
189
case 0x0082:        /* stc Rm_BANK,Rn */
190
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
191
    {
192
     TCGv addr = tcg_temp_new();
193
     tcg_gen_subi_i32(addr, REG(B11_8), 4);
194
- tcg_gen_qemu_st_i32(ALTREG(B6_4), addr, ctx->memidx, MO_TEUL);
195
+ tcg_gen_qemu_st_i32(ALTREG(B6_4), addr, ctx->memidx,
196
+ MO_TEUL | MO_ALIGN);
197
     tcg_gen_mov_i32(REG(B11_8), addr);
198
    }
199
    return;
200
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
201
    CHECK_PRIVILEGED
202
    {
203
     TCGv val = tcg_temp_new();
204
- tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx, MO_TESL);
205
+ tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx,
206
+ MO_TESL | MO_ALIGN);
207
tcg_gen_andi_i32(val, val, 0x700083f3);
208
gen_write_sr(val);
209
     tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
210
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
211
TCGv val = tcg_temp_new();
212
     tcg_gen_subi_i32(addr, REG(B11_8), 4);
213
gen_read_sr(val);
214
- tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL);
215
+ tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL | MO_ALIGN);
216
     tcg_gen_mov_i32(REG(B11_8), addr);
217
    }
218
    return;
219
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
220
return;                            \
221
case ldpnum:                            \
222
prechk                             \
223
- tcg_gen_qemu_ld_i32(cpu_##reg, REG(B11_8), ctx->memidx, MO_TESL); \
224
+ tcg_gen_qemu_ld_i32(cpu_##reg, REG(B11_8), ctx->memidx, \
225
+ MO_TESL | MO_ALIGN); \
226
tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);        \
227
return;
228
#define ST(reg,stnum,stpnum,prechk)        \
229
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
230
{                                \
231
    TCGv addr = tcg_temp_new();                \
232
    tcg_gen_subi_i32(addr, REG(B11_8), 4);            \
233
- tcg_gen_qemu_st_i32(cpu_##reg, addr, ctx->memidx, MO_TEUL); \
234
+ tcg_gen_qemu_st_i32(cpu_##reg, addr, ctx->memidx, \
235
+ MO_TEUL | MO_ALIGN); \
236
    tcg_gen_mov_i32(REG(B11_8), addr);            \
237
}                                \
238
return;
239
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
240
    CHECK_FPU_ENABLED
241
    {
242
     TCGv addr = tcg_temp_new();
243
- tcg_gen_qemu_ld_i32(addr, REG(B11_8), ctx->memidx, MO_TESL);
244
+ tcg_gen_qemu_ld_i32(addr, REG(B11_8), ctx->memidx,
245
+ MO_TESL | MO_ALIGN);
246
     tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
247
gen_helper_ld_fpscr(cpu_env, addr);
248
ctx->base.is_jmp = DISAS_STOP;
249
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
250
     tcg_gen_andi_i32(val, cpu_fpscr, 0x003fffff);
251
     addr = tcg_temp_new();
252
     tcg_gen_subi_i32(addr, REG(B11_8), 4);
253
- tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL);
254
+ tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL | MO_ALIGN);
255
     tcg_gen_mov_i32(REG(B11_8), addr);
256
    }
257
    return;
258
case 0x00c3:        /* movca.l R0,@Rm */
259
{
260
TCGv val = tcg_temp_new();
261
- tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx, MO_TEUL);
262
+ tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx,
263
+ MO_TEUL | MO_ALIGN);
264
gen_helper_movcal(cpu_env, REG(B11_8), val);
265
- tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
266
+ tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx,
267
+ MO_TEUL | MO_ALIGN);
268
}
269
ctx->has_movcal = 1;
270
    return;
271
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
272
cpu_lock_addr, fail);
273
tmp = tcg_temp_new();
274
tcg_gen_atomic_cmpxchg_i32(tmp, REG(B11_8), cpu_lock_value,
275
- REG(0), ctx->memidx, MO_TEUL);
276
+ REG(0), ctx->memidx,
277
+ MO_TEUL | MO_ALIGN);
278
tcg_gen_setcond_i32(TCG_COND_EQ, cpu_sr_t, tmp, cpu_lock_value);
279
} else {
280
tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_lock_addr, -1, fail);
281
- tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
282
+ tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx,
283
+ MO_TEUL | MO_ALIGN);
284
tcg_gen_movi_i32(cpu_sr_t, 1);
285
}
286
tcg_gen_br(done);
287
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
288
if ((tb_cflags(ctx->base.tb) & CF_PARALLEL)) {
289
TCGv tmp = tcg_temp_new();
290
tcg_gen_mov_i32(tmp, REG(B11_8));
291
- tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TESL);
292
+ tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx,
293
+ MO_TESL | MO_ALIGN);
294
tcg_gen_mov_i32(cpu_lock_value, REG(0));
295
tcg_gen_mov_i32(cpu_lock_addr, tmp);
296
} else {
297
- tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TESL);
298
+ tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx,
299
+ MO_TESL | MO_ALIGN);
300
tcg_gen_movi_i32(cpu_lock_addr, 0);
301
}
302
return;
303
--
304
2.34.1
305
306
diff view generated by jsdifflib
New patch
1
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
configs/targets/sh4-linux-user.mak | 1 -
5
configs/targets/sh4-softmmu.mak | 1 -
6
configs/targets/sh4eb-linux-user.mak | 1 -
7
configs/targets/sh4eb-softmmu.mak | 1 -
8
4 files changed, 4 deletions(-)
1
9
10
diff --git a/configs/targets/sh4-linux-user.mak b/configs/targets/sh4-linux-user.mak
11
index XXXXXXX..XXXXXXX 100644
12
--- a/configs/targets/sh4-linux-user.mak
13
+++ b/configs/targets/sh4-linux-user.mak
14
@@ -XXX,XX +XXX,XX @@
15
TARGET_ARCH=sh4
16
TARGET_SYSTBL_ABI=common
17
TARGET_SYSTBL=syscall.tbl
18
-TARGET_ALIGNED_ONLY=y
19
TARGET_HAS_BFLT=y
20
diff --git a/configs/targets/sh4-softmmu.mak b/configs/targets/sh4-softmmu.mak
21
index XXXXXXX..XXXXXXX 100644
22
--- a/configs/targets/sh4-softmmu.mak
23
+++ b/configs/targets/sh4-softmmu.mak
24
@@ -1,2 +1 @@
25
TARGET_ARCH=sh4
26
-TARGET_ALIGNED_ONLY=y
27
diff --git a/configs/targets/sh4eb-linux-user.mak b/configs/targets/sh4eb-linux-user.mak
28
index XXXXXXX..XXXXXXX 100644
29
--- a/configs/targets/sh4eb-linux-user.mak
30
+++ b/configs/targets/sh4eb-linux-user.mak
31
@@ -XXX,XX +XXX,XX @@
32
TARGET_ARCH=sh4
33
TARGET_SYSTBL_ABI=common
34
TARGET_SYSTBL=syscall.tbl
35
-TARGET_ALIGNED_ONLY=y
36
TARGET_BIG_ENDIAN=y
37
TARGET_HAS_BFLT=y
38
diff --git a/configs/targets/sh4eb-softmmu.mak b/configs/targets/sh4eb-softmmu.mak
39
index XXXXXXX..XXXXXXX 100644
40
--- a/configs/targets/sh4eb-softmmu.mak
41
+++ b/configs/targets/sh4eb-softmmu.mak
42
@@ -XXX,XX +XXX,XX @@
43
TARGET_ARCH=sh4
44
-TARGET_ALIGNED_ONLY=y
45
TARGET_BIG_ENDIAN=y
46
--
47
2.34.1
48
49
diff view generated by jsdifflib
New patch
1
All uses have now been expunged.
1
2
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
include/exec/memop.h | 13 ++-----------
7
include/exec/poison.h | 1 -
8
tcg/tcg.c | 5 -----
9
3 files changed, 2 insertions(+), 17 deletions(-)
10
11
diff --git a/include/exec/memop.h b/include/exec/memop.h
12
index XXXXXXX..XXXXXXX 100644
13
--- a/include/exec/memop.h
14
+++ b/include/exec/memop.h
15
@@ -XXX,XX +XXX,XX @@ typedef enum MemOp {
16
* MO_UNALN accesses are never checked for alignment.
17
* MO_ALIGN accesses will result in a call to the CPU's
18
* do_unaligned_access hook if the guest address is not aligned.
19
- * The default depends on whether the target CPU defines
20
- * TARGET_ALIGNED_ONLY.
21
*
22
* Some architectures (e.g. ARMv8) need the address which is aligned
23
* to a size more than the size of the memory access.
24
@@ -XXX,XX +XXX,XX @@ typedef enum MemOp {
25
*/
26
MO_ASHIFT = 5,
27
MO_AMASK = 0x7 << MO_ASHIFT,
28
-#ifdef NEED_CPU_H
29
-#ifdef TARGET_ALIGNED_ONLY
30
- MO_ALIGN = 0,
31
- MO_UNALN = MO_AMASK,
32
-#else
33
- MO_ALIGN = MO_AMASK,
34
- MO_UNALN = 0,
35
-#endif
36
-#endif
37
+ MO_UNALN = 0,
38
MO_ALIGN_2 = 1 << MO_ASHIFT,
39
MO_ALIGN_4 = 2 << MO_ASHIFT,
40
MO_ALIGN_8 = 3 << MO_ASHIFT,
41
MO_ALIGN_16 = 4 << MO_ASHIFT,
42
MO_ALIGN_32 = 5 << MO_ASHIFT,
43
MO_ALIGN_64 = 6 << MO_ASHIFT,
44
+ MO_ALIGN = MO_AMASK,
45
46
/* Combinations of the above, for ease of use. */
47
MO_UB = MO_8,
48
diff --git a/include/exec/poison.h b/include/exec/poison.h
49
index XXXXXXX..XXXXXXX 100644
50
--- a/include/exec/poison.h
51
+++ b/include/exec/poison.h
52
@@ -XXX,XX +XXX,XX @@
53
#pragma GCC poison TARGET_TRICORE
54
#pragma GCC poison TARGET_XTENSA
55
56
-#pragma GCC poison TARGET_ALIGNED_ONLY
57
#pragma GCC poison TARGET_HAS_BFLT
58
#pragma GCC poison TARGET_NAME
59
#pragma GCC poison TARGET_SUPPORTS_MTTCG
60
diff --git a/tcg/tcg.c b/tcg/tcg.c
61
index XXXXXXX..XXXXXXX 100644
62
--- a/tcg/tcg.c
63
+++ b/tcg/tcg.c
64
@@ -XXX,XX +XXX,XX @@ static const char * const ldst_name[] =
65
};
66
67
static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
68
-#ifdef TARGET_ALIGNED_ONLY
69
[MO_UNALN >> MO_ASHIFT] = "un+",
70
- [MO_ALIGN >> MO_ASHIFT] = "",
71
-#else
72
- [MO_UNALN >> MO_ASHIFT] = "",
73
[MO_ALIGN >> MO_ASHIFT] = "al+",
74
-#endif
75
[MO_ALIGN_2 >> MO_ASHIFT] = "al2+",
76
[MO_ALIGN_4 >> MO_ASHIFT] = "al4+",
77
[MO_ALIGN_8 >> MO_ASHIFT] = "al8+",
78
--
79
2.34.1
80
81
diff view generated by jsdifflib
New patch
1
Like cpu_in_exclusive_context, but also true if
2
there is no other cpu against which we could race.
1
3
4
Use it in tb_flush as a direct replacement.
5
Use it in cpu_loop_exit_atomic to ensure that there
6
is no loop against cpu_exec_step_atomic.
7
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
13
accel/tcg/internal.h | 9 +++++++++
14
accel/tcg/cpu-exec-common.c | 3 +++
15
accel/tcg/tb-maint.c | 2 +-
16
3 files changed, 13 insertions(+), 1 deletion(-)
17
18
diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h
19
index XXXXXXX..XXXXXXX 100644
20
--- a/accel/tcg/internal.h
21
+++ b/accel/tcg/internal.h
22
@@ -XXX,XX +XXX,XX @@ static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb)
23
}
24
}
25
26
+/*
27
+ * Return true if CS is not running in parallel with other cpus, either
28
+ * because there are no other cpus or we are within an exclusive context.
29
+ */
30
+static inline bool cpu_in_serial_context(CPUState *cs)
31
+{
32
+ return !(cs->tcg_cflags & CF_PARALLEL) || cpu_in_exclusive_context(cs);
33
+}
34
+
35
extern int64_t max_delay;
36
extern int64_t max_advance;
37
38
diff --git a/accel/tcg/cpu-exec-common.c b/accel/tcg/cpu-exec-common.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/accel/tcg/cpu-exec-common.c
41
+++ b/accel/tcg/cpu-exec-common.c
42
@@ -XXX,XX +XXX,XX @@
43
#include "sysemu/tcg.h"
44
#include "exec/exec-all.h"
45
#include "qemu/plugin.h"
46
+#include "internal.h"
47
48
bool tcg_allowed;
49
50
@@ -XXX,XX +XXX,XX @@ void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc)
51
52
void cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc)
53
{
54
+ /* Prevent looping if already executing in a serial context. */
55
+ g_assert(!cpu_in_serial_context(cpu));
56
cpu->exception_index = EXCP_ATOMIC;
57
cpu_loop_exit_restore(cpu, pc);
58
}
59
diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
60
index XXXXXXX..XXXXXXX 100644
61
--- a/accel/tcg/tb-maint.c
62
+++ b/accel/tcg/tb-maint.c
63
@@ -XXX,XX +XXX,XX @@ void tb_flush(CPUState *cpu)
64
if (tcg_enabled()) {
65
unsigned tb_flush_count = qatomic_read(&tb_ctx.tb_flush_count);
66
67
- if (cpu_in_exclusive_context(cpu)) {
68
+ if (cpu_in_serial_context(cpu)) {
69
do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count));
70
} else {
71
async_safe_run_on_cpu(cpu, do_tb_flush,
72
--
73
2.34.1
74
75
diff view generated by jsdifflib
1
The tcg_out_ldst helper will handle out-of-range offsets.
1
Instead of playing with offsetof in various places, use
2
We haven't actually encountered any, since we haven't run
2
MMUAccessType to index an array. This is easily defined
3
across the assert within tcg_out_op_rrs, but an out-of-range
3
instead of the previous dummy padding array in the union.
4
offset would not be impossible in future.
5
4
6
Fixes: 65089889183 ("tcg/tci: Change encoding to uint32_t units")
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
9
---
10
tcg/tci/tcg-target.c.inc | 5 ++---
10
include/exec/cpu-defs.h | 7 ++-
11
1 file changed, 2 insertions(+), 3 deletions(-)
11
include/exec/cpu_ldst.h | 26 ++++++++--
12
accel/tcg/cputlb.c | 104 +++++++++++++---------------------------
13
3 files changed, 59 insertions(+), 78 deletions(-)
12
14
13
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
15
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
14
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/tci/tcg-target.c.inc
17
--- a/include/exec/cpu-defs.h
16
+++ b/tcg/tci/tcg-target.c.inc
18
+++ b/include/exec/cpu-defs.h
17
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
19
@@ -XXX,XX +XXX,XX @@ typedef struct CPUTLBEntry {
18
static void tcg_out_st(TCGContext *s, TCGType type, TCGReg val, TCGReg base,
20
use the corresponding iotlb value. */
19
intptr_t offset)
21
uintptr_t addend;
20
{
22
};
21
- stack_bounds_check(base, offset);
23
- /* padding to get a power of two size */
22
switch (type) {
24
- uint8_t dummy[1 << CPU_TLB_ENTRY_BITS];
23
case TCG_TYPE_I32:
25
+ /*
24
- tcg_out_op_rrs(s, INDEX_op_st_i32, val, base, offset);
26
+ * Padding to get a power of two size, as well as index
25
+ tcg_out_ldst(s, INDEX_op_st_i32, val, base, offset);
27
+ * access to addr_{read,write,code}.
26
break;
28
+ */
27
#if TCG_TARGET_REG_BITS == 64
29
+ target_ulong addr_idx[(1 << CPU_TLB_ENTRY_BITS) / TARGET_LONG_SIZE];
28
case TCG_TYPE_I64:
30
};
29
- tcg_out_op_rrs(s, INDEX_op_st_i64, val, base, offset);
31
} CPUTLBEntry;
30
+ tcg_out_ldst(s, INDEX_op_st_i64, val, base, offset);
32
31
break;
33
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
32
#endif
34
index XXXXXXX..XXXXXXX 100644
33
default:
35
--- a/include/exec/cpu_ldst.h
36
+++ b/include/exec/cpu_ldst.h
37
@@ -XXX,XX +XXX,XX @@ static inline void clear_helper_retaddr(void)
38
/* Needed for TCG_OVERSIZED_GUEST */
39
#include "tcg/tcg.h"
40
41
+static inline target_ulong tlb_read_idx(const CPUTLBEntry *entry,
42
+ MMUAccessType access_type)
43
+{
44
+ /* Do not rearrange the CPUTLBEntry structure members. */
45
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_read) !=
46
+ MMU_DATA_LOAD * TARGET_LONG_SIZE);
47
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_write) !=
48
+ MMU_DATA_STORE * TARGET_LONG_SIZE);
49
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_code) !=
50
+ MMU_INST_FETCH * TARGET_LONG_SIZE);
51
+
52
+ const target_ulong *ptr = &entry->addr_idx[access_type];
53
+#if TCG_OVERSIZED_GUEST
54
+ return *ptr;
55
+#else
56
+ /* ofs might correspond to .addr_write, so use qatomic_read */
57
+ return qatomic_read(ptr);
58
+#endif
59
+}
60
+
61
static inline target_ulong tlb_addr_write(const CPUTLBEntry *entry)
62
{
63
-#if TCG_OVERSIZED_GUEST
64
- return entry->addr_write;
65
-#else
66
- return qatomic_read(&entry->addr_write);
67
-#endif
68
+ return tlb_read_idx(entry, MMU_DATA_STORE);
69
}
70
71
/* Find the TLB index corresponding to the mmu_idx + address pair. */
72
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
73
index XXXXXXX..XXXXXXX 100644
74
--- a/accel/tcg/cputlb.c
75
+++ b/accel/tcg/cputlb.c
76
@@ -XXX,XX +XXX,XX @@ static void io_writex(CPUArchState *env, CPUTLBEntryFull *full,
77
}
78
}
79
80
-static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs)
81
-{
82
-#if TCG_OVERSIZED_GUEST
83
- return *(target_ulong *)((uintptr_t)entry + ofs);
84
-#else
85
- /* ofs might correspond to .addr_write, so use qatomic_read */
86
- return qatomic_read((target_ulong *)((uintptr_t)entry + ofs));
87
-#endif
88
-}
89
-
90
/* Return true if ADDR is present in the victim tlb, and has been copied
91
back to the main tlb. */
92
static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
93
- size_t elt_ofs, target_ulong page)
94
+ MMUAccessType access_type, target_ulong page)
95
{
96
size_t vidx;
97
98
assert_cpu_is_self(env_cpu(env));
99
for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
100
CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx];
101
- target_ulong cmp;
102
-
103
- /* elt_ofs might correspond to .addr_write, so use qatomic_read */
104
-#if TCG_OVERSIZED_GUEST
105
- cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
106
-#else
107
- cmp = qatomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs));
108
-#endif
109
+ target_ulong cmp = tlb_read_idx(vtlb, access_type);
110
111
if (cmp == page) {
112
/* Found entry in victim tlb, swap tlb and iotlb. */
113
@@ -XXX,XX +XXX,XX @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
114
return false;
115
}
116
117
-/* Macro to call the above, with local variables from the use context. */
118
-#define VICTIM_TLB_HIT(TY, ADDR) \
119
- victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
120
- (ADDR) & TARGET_PAGE_MASK)
121
-
122
static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
123
CPUTLBEntryFull *full, uintptr_t retaddr)
124
{
125
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
126
{
127
uintptr_t index = tlb_index(env, mmu_idx, addr);
128
CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
129
- target_ulong tlb_addr, page_addr;
130
- size_t elt_ofs;
131
- int flags;
132
+ target_ulong tlb_addr = tlb_read_idx(entry, access_type);
133
+ target_ulong page_addr = addr & TARGET_PAGE_MASK;
134
+ int flags = TLB_FLAGS_MASK;
135
136
- switch (access_type) {
137
- case MMU_DATA_LOAD:
138
- elt_ofs = offsetof(CPUTLBEntry, addr_read);
139
- break;
140
- case MMU_DATA_STORE:
141
- elt_ofs = offsetof(CPUTLBEntry, addr_write);
142
- break;
143
- case MMU_INST_FETCH:
144
- elt_ofs = offsetof(CPUTLBEntry, addr_code);
145
- break;
146
- default:
147
- g_assert_not_reached();
148
- }
149
- tlb_addr = tlb_read_ofs(entry, elt_ofs);
150
-
151
- flags = TLB_FLAGS_MASK;
152
- page_addr = addr & TARGET_PAGE_MASK;
153
if (!tlb_hit_page(tlb_addr, page_addr)) {
154
- if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) {
155
+ if (!victim_tlb_hit(env, mmu_idx, index, access_type, page_addr)) {
156
CPUState *cs = env_cpu(env);
157
158
if (!cs->cc->tcg_ops->tlb_fill(cs, addr, fault_size, access_type,
159
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
160
*/
161
flags &= ~TLB_INVALID_MASK;
162
}
163
- tlb_addr = tlb_read_ofs(entry, elt_ofs);
164
+ tlb_addr = tlb_read_idx(entry, access_type);
165
}
166
flags &= tlb_addr;
167
168
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
169
if (prot & PAGE_WRITE) {
170
tlb_addr = tlb_addr_write(tlbe);
171
if (!tlb_hit(tlb_addr, addr)) {
172
- if (!VICTIM_TLB_HIT(addr_write, addr)) {
173
+ if (!victim_tlb_hit(env, mmu_idx, index, MMU_DATA_STORE,
174
+ addr & TARGET_PAGE_MASK)) {
175
tlb_fill(env_cpu(env), addr, size,
176
MMU_DATA_STORE, mmu_idx, retaddr);
177
index = tlb_index(env, mmu_idx, addr);
178
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
179
} else /* if (prot & PAGE_READ) */ {
180
tlb_addr = tlbe->addr_read;
181
if (!tlb_hit(tlb_addr, addr)) {
182
- if (!VICTIM_TLB_HIT(addr_read, addr)) {
183
+ if (!victim_tlb_hit(env, mmu_idx, index, MMU_DATA_LOAD,
184
+ addr & TARGET_PAGE_MASK)) {
185
tlb_fill(env_cpu(env), addr, size,
186
MMU_DATA_LOAD, mmu_idx, retaddr);
187
index = tlb_index(env, mmu_idx, addr);
188
@@ -XXX,XX +XXX,XX @@ load_memop(const void *haddr, MemOp op)
189
190
static inline uint64_t QEMU_ALWAYS_INLINE
191
load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
192
- uintptr_t retaddr, MemOp op, bool code_read,
193
+ uintptr_t retaddr, MemOp op, MMUAccessType access_type,
194
FullLoadHelper *full_load)
195
{
196
- const size_t tlb_off = code_read ?
197
- offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read);
198
- const MMUAccessType access_type =
199
- code_read ? MMU_INST_FETCH : MMU_DATA_LOAD;
200
const unsigned a_bits = get_alignment_bits(get_memop(oi));
201
const size_t size = memop_size(op);
202
uintptr_t mmu_idx = get_mmuidx(oi);
203
@@ -XXX,XX +XXX,XX @@ load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
204
205
index = tlb_index(env, mmu_idx, addr);
206
entry = tlb_entry(env, mmu_idx, addr);
207
- tlb_addr = code_read ? entry->addr_code : entry->addr_read;
208
+ tlb_addr = tlb_read_idx(entry, access_type);
209
210
/* If the TLB entry is for a different page, reload and try again. */
211
if (!tlb_hit(tlb_addr, addr)) {
212
- if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
213
+ if (!victim_tlb_hit(env, mmu_idx, index, access_type,
214
addr & TARGET_PAGE_MASK)) {
215
tlb_fill(env_cpu(env), addr, size,
216
access_type, mmu_idx, retaddr);
217
index = tlb_index(env, mmu_idx, addr);
218
entry = tlb_entry(env, mmu_idx, addr);
219
}
220
- tlb_addr = code_read ? entry->addr_code : entry->addr_read;
221
+ tlb_addr = tlb_read_idx(entry, access_type);
222
tlb_addr &= ~TLB_INVALID_MASK;
223
}
224
225
@@ -XXX,XX +XXX,XX @@ static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr,
226
MemOpIdx oi, uintptr_t retaddr)
227
{
228
validate_memop(oi, MO_UB);
229
- return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu);
230
+ return load_helper(env, addr, oi, retaddr, MO_UB, MMU_DATA_LOAD,
231
+ full_ldub_mmu);
232
}
233
234
tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
235
@@ -XXX,XX +XXX,XX @@ static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr,
236
MemOpIdx oi, uintptr_t retaddr)
237
{
238
validate_memop(oi, MO_LEUW);
239
- return load_helper(env, addr, oi, retaddr, MO_LEUW, false,
240
+ return load_helper(env, addr, oi, retaddr, MO_LEUW, MMU_DATA_LOAD,
241
full_le_lduw_mmu);
242
}
243
244
@@ -XXX,XX +XXX,XX @@ static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr,
245
MemOpIdx oi, uintptr_t retaddr)
246
{
247
validate_memop(oi, MO_BEUW);
248
- return load_helper(env, addr, oi, retaddr, MO_BEUW, false,
249
+ return load_helper(env, addr, oi, retaddr, MO_BEUW, MMU_DATA_LOAD,
250
full_be_lduw_mmu);
251
}
252
253
@@ -XXX,XX +XXX,XX @@ static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr,
254
MemOpIdx oi, uintptr_t retaddr)
255
{
256
validate_memop(oi, MO_LEUL);
257
- return load_helper(env, addr, oi, retaddr, MO_LEUL, false,
258
+ return load_helper(env, addr, oi, retaddr, MO_LEUL, MMU_DATA_LOAD,
259
full_le_ldul_mmu);
260
}
261
262
@@ -XXX,XX +XXX,XX @@ static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr,
263
MemOpIdx oi, uintptr_t retaddr)
264
{
265
validate_memop(oi, MO_BEUL);
266
- return load_helper(env, addr, oi, retaddr, MO_BEUL, false,
267
+ return load_helper(env, addr, oi, retaddr, MO_BEUL, MMU_DATA_LOAD,
268
full_be_ldul_mmu);
269
}
270
271
@@ -XXX,XX +XXX,XX @@ uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
272
MemOpIdx oi, uintptr_t retaddr)
273
{
274
validate_memop(oi, MO_LEUQ);
275
- return load_helper(env, addr, oi, retaddr, MO_LEUQ, false,
276
+ return load_helper(env, addr, oi, retaddr, MO_LEUQ, MMU_DATA_LOAD,
277
helper_le_ldq_mmu);
278
}
279
280
@@ -XXX,XX +XXX,XX @@ uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
281
MemOpIdx oi, uintptr_t retaddr)
282
{
283
validate_memop(oi, MO_BEUQ);
284
- return load_helper(env, addr, oi, retaddr, MO_BEUQ, false,
285
+ return load_helper(env, addr, oi, retaddr, MO_BEUQ, MMU_DATA_LOAD,
286
helper_be_ldq_mmu);
287
}
288
289
@@ -XXX,XX +XXX,XX @@ store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
290
uintptr_t retaddr, size_t size, uintptr_t mmu_idx,
291
bool big_endian)
292
{
293
- const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
294
uintptr_t index, index2;
295
CPUTLBEntry *entry, *entry2;
296
target_ulong page1, page2, tlb_addr, tlb_addr2;
297
@@ -XXX,XX +XXX,XX @@ store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
298
299
tlb_addr2 = tlb_addr_write(entry2);
300
if (page1 != page2 && !tlb_hit_page(tlb_addr2, page2)) {
301
- if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) {
302
+ if (!victim_tlb_hit(env, mmu_idx, index2, MMU_DATA_STORE, page2)) {
303
tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE,
304
mmu_idx, retaddr);
305
index2 = tlb_index(env, mmu_idx, page2);
306
@@ -XXX,XX +XXX,XX @@ static inline void QEMU_ALWAYS_INLINE
307
store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
308
MemOpIdx oi, uintptr_t retaddr, MemOp op)
309
{
310
- const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
311
const unsigned a_bits = get_alignment_bits(get_memop(oi));
312
const size_t size = memop_size(op);
313
uintptr_t mmu_idx = get_mmuidx(oi);
314
@@ -XXX,XX +XXX,XX @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
315
316
/* If the TLB entry is for a different page, reload and try again. */
317
if (!tlb_hit(tlb_addr, addr)) {
318
- if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
319
+ if (!victim_tlb_hit(env, mmu_idx, index, MMU_DATA_STORE,
320
addr & TARGET_PAGE_MASK)) {
321
tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE,
322
mmu_idx, retaddr);
323
@@ -XXX,XX +XXX,XX @@ void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
324
static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr,
325
MemOpIdx oi, uintptr_t retaddr)
326
{
327
- return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_code);
328
+ return load_helper(env, addr, oi, retaddr, MO_8,
329
+ MMU_INST_FETCH, full_ldub_code);
330
}
331
332
uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr)
333
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr)
334
static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr,
335
MemOpIdx oi, uintptr_t retaddr)
336
{
337
- return load_helper(env, addr, oi, retaddr, MO_TEUW, true, full_lduw_code);
338
+ return load_helper(env, addr, oi, retaddr, MO_TEUW,
339
+ MMU_INST_FETCH, full_lduw_code);
340
}
341
342
uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr)
343
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr)
344
static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr,
345
MemOpIdx oi, uintptr_t retaddr)
346
{
347
- return load_helper(env, addr, oi, retaddr, MO_TEUL, true, full_ldl_code);
348
+ return load_helper(env, addr, oi, retaddr, MO_TEUL,
349
+ MMU_INST_FETCH, full_ldl_code);
350
}
351
352
uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr)
353
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr)
354
static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr,
355
MemOpIdx oi, uintptr_t retaddr)
356
{
357
- return load_helper(env, addr, oi, retaddr, MO_TEUQ, true, full_ldq_code);
358
+ return load_helper(env, addr, oi, retaddr, MO_TEUQ,
359
+ MMU_INST_FETCH, full_ldq_code);
360
}
361
362
uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr)
34
--
363
--
35
2.25.1
364
2.34.1
36
365
37
366
diff view generated by jsdifflib
1
The dh_alias redirect is intended to handle TCG types as distinguished
1
Instead of trying to unify all operations on uint64_t, pull out
2
from C types. TCG does not distinguish signed int from unsigned int,
2
mmu_lookup() to perform the basic tlb hit and resolution.
3
because they are the same size. However, we need to retain this
3
Create individual functions to handle access by size.
4
distinction for dh_typecode, lest we fail to extend abi types properly
5
for the host call parameters.
6
4
7
This bug was detected when running the 'arm' emulator on an s390
8
system. The s390 uses TCG_TARGET_EXTEND_ARGS which triggers code
9
in tcg_gen_callN to extend 32 bit values to 64 bits; the incorrect
10
sign data in the typemask for each argument caused the values to be
11
extended as unsigned values.
12
13
This simple program exhibits the problem:
14
15
    static volatile int num = -9;
16
    static volatile int den = -5;
17
    int main(void)
18
    {
19
        int quo = num / den;
20
        printf("num %d den %d quo %d\n", num, den, quo);
21
        exit(0);
22
    }
23
24
When run on the broken qemu, this results in:
25
26
    num -9 den -5 quo 0
27
28
The correct result is:
29
30
    num -9 den -5 quo 1
31
32
Fixes: 7319d83a735 ("tcg: Combine dh_is_64bit and dh_is_signed to dh_typecode")
33
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/876
34
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
35
Reported-by: Christian Ehrhardt <christian.ehrhardt@canonical.com>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
36
Tested-by: Christian Ehrhardt <christian.ehrhardt@canonical.com>
37
Tested-by: Keith Packard <keithp@keithp.com>
38
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
39
---
8
---
40
include/exec/helper-head.h | 19 ++++++++++---------
9
accel/tcg/cputlb.c | 645 +++++++++++++++++++++++++++++----------------
41
target/hppa/helper.h | 2 ++
10
1 file changed, 424 insertions(+), 221 deletions(-)
42
target/i386/ops_sse_header.h | 3 +++
43
target/m68k/helper.h | 1 +
44
target/ppc/helper.h | 3 +++
45
5 files changed, 19 insertions(+), 9 deletions(-)
46
11
47
diff --git a/include/exec/helper-head.h b/include/exec/helper-head.h
12
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
48
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
49
--- a/include/exec/helper-head.h
14
--- a/accel/tcg/cputlb.c
50
+++ b/include/exec/helper-head.h
15
+++ b/accel/tcg/cputlb.c
51
@@ -XXX,XX +XXX,XX @@
16
@@ -XXX,XX +XXX,XX @@ bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
52
# ifdef TARGET_LONG_BITS
17
53
# if TARGET_LONG_BITS == 32
54
# define dh_alias_tl i32
55
+# define dh_typecode_tl dh_typecode_i32
56
# else
57
# define dh_alias_tl i64
58
+# define dh_typecode_tl dh_typecode_i64
59
# endif
60
# endif
61
-# define dh_alias_env ptr
62
# define dh_ctype_tl target_ulong
63
+# define dh_alias_env ptr
64
# define dh_ctype_env CPUArchState *
65
+# define dh_typecode_env dh_typecode_ptr
66
#endif
18
#endif
67
19
68
/* We can't use glue() here because it falls foul of C preprocessor
20
+/*
69
@@ -XXX,XX +XXX,XX @@
21
+ * Probe for a load/store operation.
70
#define dh_typecode_i64 4
22
+ * Return the host address and into @flags.
71
#define dh_typecode_s64 5
23
+ */
72
#define dh_typecode_ptr 6
24
+
73
-#define dh_typecode(t) glue(dh_typecode_, dh_alias(t))
25
+typedef struct MMULookupPageData {
74
+#define dh_typecode_int dh_typecode_s32
26
+ CPUTLBEntryFull *full;
75
+#define dh_typecode_f16 dh_typecode_i32
27
+ void *haddr;
76
+#define dh_typecode_f32 dh_typecode_i32
28
+ target_ulong addr;
77
+#define dh_typecode_f64 dh_typecode_i64
29
+ int flags;
78
+#define dh_typecode_cptr dh_typecode_ptr
30
+ int size;
79
+#define dh_typecode(t) dh_typecode_##t
31
+} MMULookupPageData;
80
32
+
81
#define dh_callflag_i32 0
33
+typedef struct MMULookupLocals {
82
-#define dh_callflag_s32 0
34
+ MMULookupPageData page[2];
83
-#define dh_callflag_int 0
35
+ MemOp memop;
84
#define dh_callflag_i64 0
36
+ int mmu_idx;
85
-#define dh_callflag_s64 0
37
+} MMULookupLocals;
86
-#define dh_callflag_f16 0
38
+
87
-#define dh_callflag_f32 0
39
+/**
88
-#define dh_callflag_f64 0
40
+ * mmu_lookup1: translate one page
89
#define dh_callflag_ptr 0
41
+ * @env: cpu context
90
-#define dh_callflag_cptr dh_callflag_ptr
42
+ * @data: lookup parameters
91
#define dh_callflag_void 0
43
+ * @mmu_idx: virtual address context
92
#define dh_callflag_noreturn TCG_CALL_NO_RETURN
44
+ * @access_type: load/store/code
93
#define dh_callflag(t) glue(dh_callflag_, dh_alias(t))
45
+ * @ra: return address into tcg generated code, or 0
94
diff --git a/target/hppa/helper.h b/target/hppa/helper.h
46
+ *
95
index XXXXXXX..XXXXXXX 100644
47
+ * Resolve the translation for the one page at @data.addr, filling in
96
--- a/target/hppa/helper.h
48
+ * the rest of @data with the results. If the translation fails,
97
+++ b/target/hppa/helper.h
49
+ * tlb_fill will longjmp out. Return true if the softmmu tlb for
98
@@ -XXX,XX +XXX,XX @@
50
+ * @mmu_idx may have resized.
99
#if TARGET_REGISTER_BITS == 64
51
+ */
100
# define dh_alias_tr i64
52
+static bool mmu_lookup1(CPUArchState *env, MMULookupPageData *data,
101
+# define dh_typecode_tr dh_typecode_i64
53
+ int mmu_idx, MMUAccessType access_type, uintptr_t ra)
102
#else
54
+{
103
# define dh_alias_tr i32
55
+ target_ulong addr = data->addr;
104
+# define dh_typecode_tr dh_typecode_i32
56
+ uintptr_t index = tlb_index(env, mmu_idx, addr);
105
#endif
57
+ CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
106
#define dh_ctype_tr target_ureg
58
+ target_ulong tlb_addr = tlb_read_idx(entry, access_type);
107
59
+ bool maybe_resized = false;
108
diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h
60
+
109
index XXXXXXX..XXXXXXX 100644
61
+ /* If the TLB entry is for a different page, reload and try again. */
110
--- a/target/i386/ops_sse_header.h
62
+ if (!tlb_hit(tlb_addr, addr)) {
111
+++ b/target/i386/ops_sse_header.h
63
+ if (!victim_tlb_hit(env, mmu_idx, index, access_type,
112
@@ -XXX,XX +XXX,XX @@
64
+ addr & TARGET_PAGE_MASK)) {
113
#define dh_ctype_Reg Reg *
65
+ tlb_fill(env_cpu(env), addr, data->size, access_type, mmu_idx, ra);
114
#define dh_ctype_ZMMReg ZMMReg *
66
+ maybe_resized = true;
115
#define dh_ctype_MMXReg MMXReg *
67
+ index = tlb_index(env, mmu_idx, addr);
116
+#define dh_typecode_Reg dh_typecode_ptr
68
+ entry = tlb_entry(env, mmu_idx, addr);
117
+#define dh_typecode_ZMMReg dh_typecode_ptr
69
+ }
118
+#define dh_typecode_MMXReg dh_typecode_ptr
70
+ tlb_addr = tlb_read_idx(entry, access_type) & ~TLB_INVALID_MASK;
119
71
+ }
120
DEF_HELPER_3(glue(psrlw, SUFFIX), void, env, Reg, Reg)
72
+
121
DEF_HELPER_3(glue(psraw, SUFFIX), void, env, Reg, Reg)
73
+ data->flags = tlb_addr & TLB_FLAGS_MASK;
122
diff --git a/target/m68k/helper.h b/target/m68k/helper.h
74
+ data->full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
123
index XXXXXXX..XXXXXXX 100644
75
+ /* Compute haddr speculatively; depending on flags it might be invalid. */
124
--- a/target/m68k/helper.h
76
+ data->haddr = (void *)((uintptr_t)addr + entry->addend);
125
+++ b/target/m68k/helper.h
77
+
126
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(cas2l_parallel, void, env, i32, i32, i32)
78
+ return maybe_resized;
127
79
+}
128
#define dh_alias_fp ptr
80
+
129
#define dh_ctype_fp FPReg *
81
+/**
130
+#define dh_typecode_fp dh_typecode_ptr
82
+ * mmu_watch_or_dirty
131
83
+ * @env: cpu context
132
DEF_HELPER_3(exts32, void, env, fp, s32)
84
+ * @data: lookup parameters
133
DEF_HELPER_3(extf32, void, env, fp, f32)
85
+ * @access_type: load/store/code
134
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
86
+ * @ra: return address into tcg generated code, or 0
135
index XXXXXXX..XXXXXXX 100644
87
+ *
136
--- a/target/ppc/helper.h
88
+ * Trigger watchpoints for @data.addr:@data.size;
137
+++ b/target/ppc/helper.h
89
+ * record writes to protected clean pages.
138
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_1(ftsqrt, TCG_CALL_NO_RWG_SE, i32, i64)
90
+ */
139
91
+static void mmu_watch_or_dirty(CPUArchState *env, MMULookupPageData *data,
140
#define dh_alias_avr ptr
92
+ MMUAccessType access_type, uintptr_t ra)
141
#define dh_ctype_avr ppc_avr_t *
93
+{
142
+#define dh_typecode_avr dh_typecode_ptr
94
+ CPUTLBEntryFull *full = data->full;
143
95
+ target_ulong addr = data->addr;
144
#define dh_alias_vsr ptr
96
+ int flags = data->flags;
145
#define dh_ctype_vsr ppc_vsr_t *
97
+ int size = data->size;
146
+#define dh_typecode_vsr dh_typecode_ptr
98
+
147
99
+ /* On watchpoint hit, this will longjmp out. */
148
DEF_HELPER_3(vavgub, void, avr, avr, avr)
100
+ if (flags & TLB_WATCHPOINT) {
149
DEF_HELPER_3(vavguh, void, avr, avr, avr)
101
+ int wp = access_type == MMU_DATA_STORE ? BP_MEM_WRITE : BP_MEM_READ;
150
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(store_dbatu, void, env, i32, tl)
102
+ cpu_check_watchpoint(env_cpu(env), addr, size, full->attrs, wp, ra);
151
103
+ flags &= ~TLB_WATCHPOINT;
152
#define dh_alias_fprp ptr
104
+ }
153
#define dh_ctype_fprp ppc_fprp_t *
105
+
154
+#define dh_typecode_fprp dh_typecode_ptr
106
+ /* Note that notdirty is only set for writes. */
155
107
+ if (flags & TLB_NOTDIRTY) {
156
DEF_HELPER_4(DADD, void, env, fprp, fprp, fprp)
108
+ notdirty_write(env_cpu(env), addr, size, full, ra);
157
DEF_HELPER_4(DADDQ, void, env, fprp, fprp, fprp)
109
+ flags &= ~TLB_NOTDIRTY;
110
+ }
111
+ data->flags = flags;
112
+}
113
+
114
+/**
115
+ * mmu_lookup: translate page(s)
116
+ * @env: cpu context
117
+ * @addr: virtual address
118
+ * @oi: combined mmu_idx and MemOp
119
+ * @ra: return address into tcg generated code, or 0
120
+ * @access_type: load/store/code
121
+ * @l: output result
122
+ *
123
+ * Resolve the translation for the page(s) beginning at @addr, for MemOp.size
124
+ * bytes. Return true if the lookup crosses a page boundary.
125
+ */
126
+static bool mmu_lookup(CPUArchState *env, target_ulong addr, MemOpIdx oi,
127
+ uintptr_t ra, MMUAccessType type, MMULookupLocals *l)
128
+{
129
+ unsigned a_bits;
130
+ bool crosspage;
131
+ int flags;
132
+
133
+ l->memop = get_memop(oi);
134
+ l->mmu_idx = get_mmuidx(oi);
135
+
136
+ tcg_debug_assert(l->mmu_idx < NB_MMU_MODES);
137
+
138
+ /* Handle CPU specific unaligned behaviour */
139
+ a_bits = get_alignment_bits(l->memop);
140
+ if (addr & ((1 << a_bits) - 1)) {
141
+ cpu_unaligned_access(env_cpu(env), addr, type, l->mmu_idx, ra);
142
+ }
143
+
144
+ l->page[0].addr = addr;
145
+ l->page[0].size = memop_size(l->memop);
146
+ l->page[1].addr = (addr + l->page[0].size - 1) & TARGET_PAGE_MASK;
147
+ l->page[1].size = 0;
148
+ crosspage = (addr ^ l->page[1].addr) & TARGET_PAGE_MASK;
149
+
150
+ if (likely(!crosspage)) {
151
+ mmu_lookup1(env, &l->page[0], l->mmu_idx, type, ra);
152
+
153
+ flags = l->page[0].flags;
154
+ if (unlikely(flags & (TLB_WATCHPOINT | TLB_NOTDIRTY))) {
155
+ mmu_watch_or_dirty(env, &l->page[0], type, ra);
156
+ }
157
+ if (unlikely(flags & TLB_BSWAP)) {
158
+ l->memop ^= MO_BSWAP;
159
+ }
160
+ } else {
161
+ /* Finish compute of page crossing. */
162
+ int size0 = l->page[1].addr - addr;
163
+ l->page[1].size = l->page[0].size - size0;
164
+ l->page[0].size = size0;
165
+
166
+ /*
167
+ * Lookup both pages, recognizing exceptions from either. If the
168
+ * second lookup potentially resized, refresh first CPUTLBEntryFull.
169
+ */
170
+ mmu_lookup1(env, &l->page[0], l->mmu_idx, type, ra);
171
+ if (mmu_lookup1(env, &l->page[1], l->mmu_idx, type, ra)) {
172
+ uintptr_t index = tlb_index(env, l->mmu_idx, addr);
173
+ l->page[0].full = &env_tlb(env)->d[l->mmu_idx].fulltlb[index];
174
+ }
175
+
176
+ flags = l->page[0].flags | l->page[1].flags;
177
+ if (unlikely(flags & (TLB_WATCHPOINT | TLB_NOTDIRTY))) {
178
+ mmu_watch_or_dirty(env, &l->page[0], type, ra);
179
+ mmu_watch_or_dirty(env, &l->page[1], type, ra);
180
+ }
181
+
182
+ /*
183
+ * Since target/sparc is the only user of TLB_BSWAP, and all
184
+ * Sparc accesses are aligned, any treatment across two pages
185
+ * would be arbitrary. Refuse it until there's a use.
186
+ */
187
+ tcg_debug_assert((flags & TLB_BSWAP) == 0);
188
+ }
189
+
190
+ return crosspage;
191
+}
192
+
193
/*
194
* Probe for an atomic operation. Do not allow unaligned operations,
195
* or io operations to proceed. Return the host address.
196
@@ -XXX,XX +XXX,XX @@ load_memop(const void *haddr, MemOp op)
197
}
198
}
199
200
-static inline uint64_t QEMU_ALWAYS_INLINE
201
-load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
202
- uintptr_t retaddr, MemOp op, MMUAccessType access_type,
203
- FullLoadHelper *full_load)
204
-{
205
- const unsigned a_bits = get_alignment_bits(get_memop(oi));
206
- const size_t size = memop_size(op);
207
- uintptr_t mmu_idx = get_mmuidx(oi);
208
- uintptr_t index;
209
- CPUTLBEntry *entry;
210
- target_ulong tlb_addr;
211
- void *haddr;
212
- uint64_t res;
213
-
214
- tcg_debug_assert(mmu_idx < NB_MMU_MODES);
215
-
216
- /* Handle CPU specific unaligned behaviour */
217
- if (addr & ((1 << a_bits) - 1)) {
218
- cpu_unaligned_access(env_cpu(env), addr, access_type,
219
- mmu_idx, retaddr);
220
- }
221
-
222
- index = tlb_index(env, mmu_idx, addr);
223
- entry = tlb_entry(env, mmu_idx, addr);
224
- tlb_addr = tlb_read_idx(entry, access_type);
225
-
226
- /* If the TLB entry is for a different page, reload and try again. */
227
- if (!tlb_hit(tlb_addr, addr)) {
228
- if (!victim_tlb_hit(env, mmu_idx, index, access_type,
229
- addr & TARGET_PAGE_MASK)) {
230
- tlb_fill(env_cpu(env), addr, size,
231
- access_type, mmu_idx, retaddr);
232
- index = tlb_index(env, mmu_idx, addr);
233
- entry = tlb_entry(env, mmu_idx, addr);
234
- }
235
- tlb_addr = tlb_read_idx(entry, access_type);
236
- tlb_addr &= ~TLB_INVALID_MASK;
237
- }
238
-
239
- /* Handle anything that isn't just a straight memory access. */
240
- if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
241
- CPUTLBEntryFull *full;
242
- bool need_swap;
243
-
244
- /* For anything that is unaligned, recurse through full_load. */
245
- if ((addr & (size - 1)) != 0) {
246
- goto do_unaligned_access;
247
- }
248
-
249
- full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
250
-
251
- /* Handle watchpoints. */
252
- if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
253
- /* On watchpoint hit, this will longjmp out. */
254
- cpu_check_watchpoint(env_cpu(env), addr, size,
255
- full->attrs, BP_MEM_READ, retaddr);
256
- }
257
-
258
- need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
259
-
260
- /* Handle I/O access. */
261
- if (likely(tlb_addr & TLB_MMIO)) {
262
- return io_readx(env, full, mmu_idx, addr, retaddr,
263
- access_type, op ^ (need_swap * MO_BSWAP));
264
- }
265
-
266
- haddr = (void *)((uintptr_t)addr + entry->addend);
267
-
268
- /*
269
- * Keep these two load_memop separate to ensure that the compiler
270
- * is able to fold the entire function to a single instruction.
271
- * There is a build-time assert inside to remind you of this. ;-)
272
- */
273
- if (unlikely(need_swap)) {
274
- return load_memop(haddr, op ^ MO_BSWAP);
275
- }
276
- return load_memop(haddr, op);
277
- }
278
-
279
- /* Handle slow unaligned access (it spans two pages or IO). */
280
- if (size > 1
281
- && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
282
- >= TARGET_PAGE_SIZE)) {
283
- target_ulong addr1, addr2;
284
- uint64_t r1, r2;
285
- unsigned shift;
286
- do_unaligned_access:
287
- addr1 = addr & ~((target_ulong)size - 1);
288
- addr2 = addr1 + size;
289
- r1 = full_load(env, addr1, oi, retaddr);
290
- r2 = full_load(env, addr2, oi, retaddr);
291
- shift = (addr & (size - 1)) * 8;
292
-
293
- if (memop_big_endian(op)) {
294
- /* Big-endian combine. */
295
- res = (r1 << shift) | (r2 >> ((size * 8) - shift));
296
- } else {
297
- /* Little-endian combine. */
298
- res = (r1 >> shift) | (r2 << ((size * 8) - shift));
299
- }
300
- return res & MAKE_64BIT_MASK(0, size * 8);
301
- }
302
-
303
- haddr = (void *)((uintptr_t)addr + entry->addend);
304
- return load_memop(haddr, op);
305
-}
306
-
307
/*
308
* For the benefit of TCG generated code, we want to avoid the
309
* complication of ABI-specific return type promotion and always
310
@@ -XXX,XX +XXX,XX @@ load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
311
* We don't bother with this widened value for SOFTMMU_CODE_ACCESS.
312
*/
313
314
-static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr,
315
- MemOpIdx oi, uintptr_t retaddr)
316
+/**
317
+ * do_ld_mmio_beN:
318
+ * @env: cpu context
319
+ * @p: translation parameters
320
+ * @ret_be: accumulated data
321
+ * @mmu_idx: virtual address context
322
+ * @ra: return address into tcg generated code, or 0
323
+ *
324
+ * Load @p->size bytes from @p->addr, which is memory-mapped i/o.
325
+ * The bytes are concatenated in big-endian order with @ret_be.
326
+ */
327
+static uint64_t do_ld_mmio_beN(CPUArchState *env, MMULookupPageData *p,
328
+ uint64_t ret_be, int mmu_idx,
329
+ MMUAccessType type, uintptr_t ra)
330
{
331
- validate_memop(oi, MO_UB);
332
- return load_helper(env, addr, oi, retaddr, MO_UB, MMU_DATA_LOAD,
333
- full_ldub_mmu);
334
+ CPUTLBEntryFull *full = p->full;
335
+ target_ulong addr = p->addr;
336
+ int i, size = p->size;
337
+
338
+ QEMU_IOTHREAD_LOCK_GUARD();
339
+ for (i = 0; i < size; i++) {
340
+ uint8_t x = io_readx(env, full, mmu_idx, addr + i, ra, type, MO_UB);
341
+ ret_be = (ret_be << 8) | x;
342
+ }
343
+ return ret_be;
344
+}
345
+
346
+/**
347
+ * do_ld_bytes_beN
348
+ * @p: translation parameters
349
+ * @ret_be: accumulated data
350
+ *
351
+ * Load @p->size bytes from @p->haddr, which is RAM.
352
+ * The bytes to concatenated in big-endian order with @ret_be.
353
+ */
354
+static uint64_t do_ld_bytes_beN(MMULookupPageData *p, uint64_t ret_be)
355
+{
356
+ uint8_t *haddr = p->haddr;
357
+ int i, size = p->size;
358
+
359
+ for (i = 0; i < size; i++) {
360
+ ret_be = (ret_be << 8) | haddr[i];
361
+ }
362
+ return ret_be;
363
+}
364
+
365
+/*
366
+ * Wrapper for the above.
367
+ */
368
+static uint64_t do_ld_beN(CPUArchState *env, MMULookupPageData *p,
369
+ uint64_t ret_be, int mmu_idx,
370
+ MMUAccessType type, uintptr_t ra)
371
+{
372
+ if (unlikely(p->flags & TLB_MMIO)) {
373
+ return do_ld_mmio_beN(env, p, ret_be, mmu_idx, type, ra);
374
+ } else {
375
+ return do_ld_bytes_beN(p, ret_be);
376
+ }
377
+}
378
+
379
+static uint8_t do_ld_1(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
380
+ MMUAccessType type, uintptr_t ra)
381
+{
382
+ if (unlikely(p->flags & TLB_MMIO)) {
383
+ return io_readx(env, p->full, mmu_idx, p->addr, ra, type, MO_UB);
384
+ } else {
385
+ return *(uint8_t *)p->haddr;
386
+ }
387
+}
388
+
389
+static uint16_t do_ld_2(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
390
+ MMUAccessType type, MemOp memop, uintptr_t ra)
391
+{
392
+ uint64_t ret;
393
+
394
+ if (unlikely(p->flags & TLB_MMIO)) {
395
+ return io_readx(env, p->full, mmu_idx, p->addr, ra, type, memop);
396
+ }
397
+
398
+ /* Perform the load host endian, then swap if necessary. */
399
+ ret = load_memop(p->haddr, MO_UW);
400
+ if (memop & MO_BSWAP) {
401
+ ret = bswap16(ret);
402
+ }
403
+ return ret;
404
+}
405
+
406
+static uint32_t do_ld_4(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
407
+ MMUAccessType type, MemOp memop, uintptr_t ra)
408
+{
409
+ uint32_t ret;
410
+
411
+ if (unlikely(p->flags & TLB_MMIO)) {
412
+ return io_readx(env, p->full, mmu_idx, p->addr, ra, type, memop);
413
+ }
414
+
415
+ /* Perform the load host endian. */
416
+ ret = load_memop(p->haddr, MO_UL);
417
+ if (memop & MO_BSWAP) {
418
+ ret = bswap32(ret);
419
+ }
420
+ return ret;
421
+}
422
+
423
+static uint64_t do_ld_8(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
424
+ MMUAccessType type, MemOp memop, uintptr_t ra)
425
+{
426
+ uint64_t ret;
427
+
428
+ if (unlikely(p->flags & TLB_MMIO)) {
429
+ return io_readx(env, p->full, mmu_idx, p->addr, ra, type, memop);
430
+ }
431
+
432
+ /* Perform the load host endian. */
433
+ ret = load_memop(p->haddr, MO_UQ);
434
+ if (memop & MO_BSWAP) {
435
+ ret = bswap64(ret);
436
+ }
437
+ return ret;
438
+}
439
+
440
+static uint8_t do_ld1_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
441
+ uintptr_t ra, MMUAccessType access_type)
442
+{
443
+ MMULookupLocals l;
444
+ bool crosspage;
445
+
446
+ crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l);
447
+ tcg_debug_assert(!crosspage);
448
+
449
+ return do_ld_1(env, &l.page[0], l.mmu_idx, access_type, ra);
450
}
451
452
tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
453
MemOpIdx oi, uintptr_t retaddr)
454
{
455
- return full_ldub_mmu(env, addr, oi, retaddr);
456
+ validate_memop(oi, MO_UB);
457
+ return do_ld1_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
458
}
459
460
-static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr,
461
- MemOpIdx oi, uintptr_t retaddr)
462
+static uint16_t do_ld2_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
463
+ uintptr_t ra, MMUAccessType access_type)
464
{
465
- validate_memop(oi, MO_LEUW);
466
- return load_helper(env, addr, oi, retaddr, MO_LEUW, MMU_DATA_LOAD,
467
- full_le_lduw_mmu);
468
+ MMULookupLocals l;
469
+ bool crosspage;
470
+ uint16_t ret;
471
+ uint8_t a, b;
472
+
473
+ crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l);
474
+ if (likely(!crosspage)) {
475
+ return do_ld_2(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
476
+ }
477
+
478
+ a = do_ld_1(env, &l.page[0], l.mmu_idx, access_type, ra);
479
+ b = do_ld_1(env, &l.page[1], l.mmu_idx, access_type, ra);
480
+
481
+ if ((l.memop & MO_BSWAP) == MO_LE) {
482
+ ret = a | (b << 8);
483
+ } else {
484
+ ret = b | (a << 8);
485
+ }
486
+ return ret;
487
}
488
489
tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
490
MemOpIdx oi, uintptr_t retaddr)
491
{
492
- return full_le_lduw_mmu(env, addr, oi, retaddr);
493
-}
494
-
495
-static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr,
496
- MemOpIdx oi, uintptr_t retaddr)
497
-{
498
- validate_memop(oi, MO_BEUW);
499
- return load_helper(env, addr, oi, retaddr, MO_BEUW, MMU_DATA_LOAD,
500
- full_be_lduw_mmu);
501
+ validate_memop(oi, MO_LEUW);
502
+ return do_ld2_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
503
}
504
505
tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
506
MemOpIdx oi, uintptr_t retaddr)
507
{
508
- return full_be_lduw_mmu(env, addr, oi, retaddr);
509
+ validate_memop(oi, MO_BEUW);
510
+ return do_ld2_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
511
}
512
513
-static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr,
514
- MemOpIdx oi, uintptr_t retaddr)
515
+static uint32_t do_ld4_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
516
+ uintptr_t ra, MMUAccessType access_type)
517
{
518
- validate_memop(oi, MO_LEUL);
519
- return load_helper(env, addr, oi, retaddr, MO_LEUL, MMU_DATA_LOAD,
520
- full_le_ldul_mmu);
521
+ MMULookupLocals l;
522
+ bool crosspage;
523
+ uint32_t ret;
524
+
525
+ crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l);
526
+ if (likely(!crosspage)) {
527
+ return do_ld_4(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
528
+ }
529
+
530
+ ret = do_ld_beN(env, &l.page[0], 0, l.mmu_idx, access_type, ra);
531
+ ret = do_ld_beN(env, &l.page[1], ret, l.mmu_idx, access_type, ra);
532
+ if ((l.memop & MO_BSWAP) == MO_LE) {
533
+ ret = bswap32(ret);
534
+ }
535
+ return ret;
536
}
537
538
tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
539
MemOpIdx oi, uintptr_t retaddr)
540
{
541
- return full_le_ldul_mmu(env, addr, oi, retaddr);
542
-}
543
-
544
-static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr,
545
- MemOpIdx oi, uintptr_t retaddr)
546
-{
547
- validate_memop(oi, MO_BEUL);
548
- return load_helper(env, addr, oi, retaddr, MO_BEUL, MMU_DATA_LOAD,
549
- full_be_ldul_mmu);
550
+ validate_memop(oi, MO_LEUL);
551
+ return do_ld4_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
552
}
553
554
tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
555
MemOpIdx oi, uintptr_t retaddr)
556
{
557
- return full_be_ldul_mmu(env, addr, oi, retaddr);
558
+ validate_memop(oi, MO_BEUL);
559
+ return do_ld4_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
560
+}
561
+
562
+static uint64_t do_ld8_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
563
+ uintptr_t ra, MMUAccessType access_type)
564
+{
565
+ MMULookupLocals l;
566
+ bool crosspage;
567
+ uint64_t ret;
568
+
569
+ crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l);
570
+ if (likely(!crosspage)) {
571
+ return do_ld_8(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
572
+ }
573
+
574
+ ret = do_ld_beN(env, &l.page[0], 0, l.mmu_idx, access_type, ra);
575
+ ret = do_ld_beN(env, &l.page[1], ret, l.mmu_idx, access_type, ra);
576
+ if ((l.memop & MO_BSWAP) == MO_LE) {
577
+ ret = bswap64(ret);
578
+ }
579
+ return ret;
580
}
581
582
uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
583
MemOpIdx oi, uintptr_t retaddr)
584
{
585
validate_memop(oi, MO_LEUQ);
586
- return load_helper(env, addr, oi, retaddr, MO_LEUQ, MMU_DATA_LOAD,
587
- helper_le_ldq_mmu);
588
+ return do_ld8_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
589
}
590
591
uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
592
MemOpIdx oi, uintptr_t retaddr)
593
{
594
validate_memop(oi, MO_BEUQ);
595
- return load_helper(env, addr, oi, retaddr, MO_BEUQ, MMU_DATA_LOAD,
596
- helper_be_ldq_mmu);
597
+ return do_ld8_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
598
}
599
600
/*
601
@@ -XXX,XX +XXX,XX @@ tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
602
* Load helpers for cpu_ldst.h.
603
*/
604
605
-static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr,
606
- MemOpIdx oi, uintptr_t retaddr,
607
- FullLoadHelper *full_load)
608
+static void plugin_load_cb(CPUArchState *env, abi_ptr addr, MemOpIdx oi)
609
{
610
- uint64_t ret;
611
-
612
- ret = full_load(env, addr, oi, retaddr);
613
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
614
- return ret;
615
}
616
617
uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra)
618
{
619
- return cpu_load_helper(env, addr, oi, ra, full_ldub_mmu);
620
+ uint8_t ret;
621
+
622
+ validate_memop(oi, MO_UB);
623
+ ret = do_ld1_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
624
+ plugin_load_cb(env, addr, oi);
625
+ return ret;
626
}
627
628
uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr addr,
629
MemOpIdx oi, uintptr_t ra)
630
{
631
- return cpu_load_helper(env, addr, oi, ra, full_be_lduw_mmu);
632
+ uint16_t ret;
633
+
634
+ validate_memop(oi, MO_BEUW);
635
+ ret = do_ld2_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
636
+ plugin_load_cb(env, addr, oi);
637
+ return ret;
638
}
639
640
uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr,
641
MemOpIdx oi, uintptr_t ra)
642
{
643
- return cpu_load_helper(env, addr, oi, ra, full_be_ldul_mmu);
644
+ uint32_t ret;
645
+
646
+ validate_memop(oi, MO_BEUL);
647
+ ret = do_ld4_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
648
+ plugin_load_cb(env, addr, oi);
649
+ return ret;
650
}
651
652
uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr,
653
MemOpIdx oi, uintptr_t ra)
654
{
655
- return cpu_load_helper(env, addr, oi, ra, helper_be_ldq_mmu);
656
+ uint64_t ret;
657
+
658
+ validate_memop(oi, MO_BEUQ);
659
+ ret = do_ld8_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
660
+ plugin_load_cb(env, addr, oi);
661
+ return ret;
662
}
663
664
uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr,
665
MemOpIdx oi, uintptr_t ra)
666
{
667
- return cpu_load_helper(env, addr, oi, ra, full_le_lduw_mmu);
668
+ uint16_t ret;
669
+
670
+ validate_memop(oi, MO_LEUW);
671
+ ret = do_ld2_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
672
+ plugin_load_cb(env, addr, oi);
673
+ return ret;
674
}
675
676
uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr,
677
MemOpIdx oi, uintptr_t ra)
678
{
679
- return cpu_load_helper(env, addr, oi, ra, full_le_ldul_mmu);
680
+ uint32_t ret;
681
+
682
+ validate_memop(oi, MO_LEUL);
683
+ ret = do_ld4_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
684
+ plugin_load_cb(env, addr, oi);
685
+ return ret;
686
}
687
688
uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
689
MemOpIdx oi, uintptr_t ra)
690
{
691
- return cpu_load_helper(env, addr, oi, ra, helper_le_ldq_mmu);
692
+ uint64_t ret;
693
+
694
+ validate_memop(oi, MO_LEUQ);
695
+ ret = do_ld8_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
696
+ plugin_load_cb(env, addr, oi);
697
+ return ret;
698
}
699
700
Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
701
@@ -XXX,XX +XXX,XX @@ void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
702
703
/* Code access functions. */
704
705
-static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr,
706
- MemOpIdx oi, uintptr_t retaddr)
707
-{
708
- return load_helper(env, addr, oi, retaddr, MO_8,
709
- MMU_INST_FETCH, full_ldub_code);
710
-}
711
-
712
uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr)
713
{
714
MemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true));
715
- return full_ldub_code(env, addr, oi, 0);
716
-}
717
-
718
-static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr,
719
- MemOpIdx oi, uintptr_t retaddr)
720
-{
721
- return load_helper(env, addr, oi, retaddr, MO_TEUW,
722
- MMU_INST_FETCH, full_lduw_code);
723
+ return do_ld1_mmu(env, addr, oi, 0, MMU_INST_FETCH);
724
}
725
726
uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr)
727
{
728
MemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true));
729
- return full_lduw_code(env, addr, oi, 0);
730
-}
731
-
732
-static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr,
733
- MemOpIdx oi, uintptr_t retaddr)
734
-{
735
- return load_helper(env, addr, oi, retaddr, MO_TEUL,
736
- MMU_INST_FETCH, full_ldl_code);
737
+ return do_ld2_mmu(env, addr, oi, 0, MMU_INST_FETCH);
738
}
739
740
uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr)
741
{
742
MemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true));
743
- return full_ldl_code(env, addr, oi, 0);
744
-}
745
-
746
-static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr,
747
- MemOpIdx oi, uintptr_t retaddr)
748
-{
749
- return load_helper(env, addr, oi, retaddr, MO_TEUQ,
750
- MMU_INST_FETCH, full_ldq_code);
751
+ return do_ld4_mmu(env, addr, oi, 0, MMU_INST_FETCH);
752
}
753
754
uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr)
755
{
756
MemOpIdx oi = make_memop_idx(MO_TEUQ, cpu_mmu_index(env, true));
757
- return full_ldq_code(env, addr, oi, 0);
758
+ return do_ld8_mmu(env, addr, oi, 0, MMU_INST_FETCH);
759
}
760
761
uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
762
MemOpIdx oi, uintptr_t retaddr)
763
{
764
- return full_ldub_code(env, addr, oi, retaddr);
765
+ return do_ld1_mmu(env, addr, oi, retaddr, MMU_INST_FETCH);
766
}
767
768
uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
769
MemOpIdx oi, uintptr_t retaddr)
770
{
771
- MemOp mop = get_memop(oi);
772
- int idx = get_mmuidx(oi);
773
- uint16_t ret;
774
-
775
- ret = full_lduw_code(env, addr, make_memop_idx(MO_TEUW, idx), retaddr);
776
- if ((mop & MO_BSWAP) != MO_TE) {
777
- ret = bswap16(ret);
778
- }
779
- return ret;
780
+ return do_ld2_mmu(env, addr, oi, retaddr, MMU_INST_FETCH);
781
}
782
783
uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
784
MemOpIdx oi, uintptr_t retaddr)
785
{
786
- MemOp mop = get_memop(oi);
787
- int idx = get_mmuidx(oi);
788
- uint32_t ret;
789
-
790
- ret = full_ldl_code(env, addr, make_memop_idx(MO_TEUL, idx), retaddr);
791
- if ((mop & MO_BSWAP) != MO_TE) {
792
- ret = bswap32(ret);
793
- }
794
- return ret;
795
+ return do_ld4_mmu(env, addr, oi, retaddr, MMU_INST_FETCH);
796
}
797
798
uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
799
MemOpIdx oi, uintptr_t retaddr)
800
{
801
- MemOp mop = get_memop(oi);
802
- int idx = get_mmuidx(oi);
803
- uint64_t ret;
804
-
805
- ret = full_ldq_code(env, addr, make_memop_idx(MO_TEUQ, idx), retaddr);
806
- if ((mop & MO_BSWAP) != MO_TE) {
807
- ret = bswap64(ret);
808
- }
809
- return ret;
810
+ return do_ld8_mmu(env, addr, oi, retaddr, MMU_INST_FETCH);
811
}
158
--
812
--
159
2.25.1
813
2.34.1
160
814
161
815
diff view generated by jsdifflib
New patch
1
Instead of trying to unify all operations on uint64_t, use
2
mmu_lookup() to perform the basic tlb hit and resolution.
3
Create individual functions to handle access by size.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
accel/tcg/cputlb.c | 408 +++++++++++++++++++++------------------------
9
1 file changed, 193 insertions(+), 215 deletions(-)
10
11
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/accel/tcg/cputlb.c
14
+++ b/accel/tcg/cputlb.c
15
@@ -XXX,XX +XXX,XX @@ store_memop(void *haddr, uint64_t val, MemOp op)
16
}
17
}
18
19
-static void full_stb_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
20
- MemOpIdx oi, uintptr_t retaddr);
21
-
22
-static void __attribute__((noinline))
23
-store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
24
- uintptr_t retaddr, size_t size, uintptr_t mmu_idx,
25
- bool big_endian)
26
+/**
27
+ * do_st_mmio_leN:
28
+ * @env: cpu context
29
+ * @p: translation parameters
30
+ * @val_le: data to store
31
+ * @mmu_idx: virtual address context
32
+ * @ra: return address into tcg generated code, or 0
33
+ *
34
+ * Store @p->size bytes at @p->addr, which is memory-mapped i/o.
35
+ * The bytes to store are extracted in little-endian order from @val_le;
36
+ * return the bytes of @val_le beyond @p->size that have not been stored.
37
+ */
38
+static uint64_t do_st_mmio_leN(CPUArchState *env, MMULookupPageData *p,
39
+ uint64_t val_le, int mmu_idx, uintptr_t ra)
40
{
41
- uintptr_t index, index2;
42
- CPUTLBEntry *entry, *entry2;
43
- target_ulong page1, page2, tlb_addr, tlb_addr2;
44
- MemOpIdx oi;
45
- size_t size2;
46
- int i;
47
+ CPUTLBEntryFull *full = p->full;
48
+ target_ulong addr = p->addr;
49
+ int i, size = p->size;
50
51
- /*
52
- * Ensure the second page is in the TLB. Note that the first page
53
- * is already guaranteed to be filled, and that the second page
54
- * cannot evict the first. An exception to this rule is PAGE_WRITE_INV
55
- * handling: the first page could have evicted itself.
56
- */
57
- page1 = addr & TARGET_PAGE_MASK;
58
- page2 = (addr + size) & TARGET_PAGE_MASK;
59
- size2 = (addr + size) & ~TARGET_PAGE_MASK;
60
- index2 = tlb_index(env, mmu_idx, page2);
61
- entry2 = tlb_entry(env, mmu_idx, page2);
62
-
63
- tlb_addr2 = tlb_addr_write(entry2);
64
- if (page1 != page2 && !tlb_hit_page(tlb_addr2, page2)) {
65
- if (!victim_tlb_hit(env, mmu_idx, index2, MMU_DATA_STORE, page2)) {
66
- tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE,
67
- mmu_idx, retaddr);
68
- index2 = tlb_index(env, mmu_idx, page2);
69
- entry2 = tlb_entry(env, mmu_idx, page2);
70
- }
71
- tlb_addr2 = tlb_addr_write(entry2);
72
+ QEMU_IOTHREAD_LOCK_GUARD();
73
+ for (i = 0; i < size; i++, val_le >>= 8) {
74
+ io_writex(env, full, mmu_idx, val_le, addr + i, ra, MO_UB);
75
}
76
+ return val_le;
77
+}
78
79
- index = tlb_index(env, mmu_idx, addr);
80
- entry = tlb_entry(env, mmu_idx, addr);
81
- tlb_addr = tlb_addr_write(entry);
82
+/**
83
+ * do_st_bytes_leN:
84
+ * @p: translation parameters
85
+ * @val_le: data to store
86
+ *
87
+ * Store @p->size bytes at @p->haddr, which is RAM.
88
+ * The bytes to store are extracted in little-endian order from @val_le;
89
+ * return the bytes of @val_le beyond @p->size that have not been stored.
90
+ */
91
+static uint64_t do_st_bytes_leN(MMULookupPageData *p, uint64_t val_le)
92
+{
93
+ uint8_t *haddr = p->haddr;
94
+ int i, size = p->size;
95
96
- /*
97
- * Handle watchpoints. Since this may trap, all checks
98
- * must happen before any store.
99
- */
100
- if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
101
- cpu_check_watchpoint(env_cpu(env), addr, size - size2,
102
- env_tlb(env)->d[mmu_idx].fulltlb[index].attrs,
103
- BP_MEM_WRITE, retaddr);
104
- }
105
- if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) {
106
- cpu_check_watchpoint(env_cpu(env), page2, size2,
107
- env_tlb(env)->d[mmu_idx].fulltlb[index2].attrs,
108
- BP_MEM_WRITE, retaddr);
109
+ for (i = 0; i < size; i++, val_le >>= 8) {
110
+ haddr[i] = val_le;
111
}
112
+ return val_le;
113
+}
114
115
- /*
116
- * XXX: not efficient, but simple.
117
- * This loop must go in the forward direction to avoid issues
118
- * with self-modifying code in Windows 64-bit.
119
- */
120
- oi = make_memop_idx(MO_UB, mmu_idx);
121
- if (big_endian) {
122
- for (i = 0; i < size; ++i) {
123
- /* Big-endian extract. */
124
- uint8_t val8 = val >> (((size - 1) * 8) - (i * 8));
125
- full_stb_mmu(env, addr + i, val8, oi, retaddr);
126
- }
127
+/*
128
+ * Wrapper for the above.
129
+ */
130
+static uint64_t do_st_leN(CPUArchState *env, MMULookupPageData *p,
131
+ uint64_t val_le, int mmu_idx, uintptr_t ra)
132
+{
133
+ if (unlikely(p->flags & TLB_MMIO)) {
134
+ return do_st_mmio_leN(env, p, val_le, mmu_idx, ra);
135
+ } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
136
+ return val_le >> (p->size * 8);
137
} else {
138
- for (i = 0; i < size; ++i) {
139
- /* Little-endian extract. */
140
- uint8_t val8 = val >> (i * 8);
141
- full_stb_mmu(env, addr + i, val8, oi, retaddr);
142
- }
143
+ return do_st_bytes_leN(p, val_le);
144
}
145
}
146
147
-static inline void QEMU_ALWAYS_INLINE
148
-store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
149
- MemOpIdx oi, uintptr_t retaddr, MemOp op)
150
+static void do_st_1(CPUArchState *env, MMULookupPageData *p, uint8_t val,
151
+ int mmu_idx, uintptr_t ra)
152
{
153
- const unsigned a_bits = get_alignment_bits(get_memop(oi));
154
- const size_t size = memop_size(op);
155
- uintptr_t mmu_idx = get_mmuidx(oi);
156
- uintptr_t index;
157
- CPUTLBEntry *entry;
158
- target_ulong tlb_addr;
159
- void *haddr;
160
-
161
- tcg_debug_assert(mmu_idx < NB_MMU_MODES);
162
-
163
- /* Handle CPU specific unaligned behaviour */
164
- if (addr & ((1 << a_bits) - 1)) {
165
- cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
166
- mmu_idx, retaddr);
167
+ if (unlikely(p->flags & TLB_MMIO)) {
168
+ io_writex(env, p->full, mmu_idx, val, p->addr, ra, MO_UB);
169
+ } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
170
+ /* nothing */
171
+ } else {
172
+ *(uint8_t *)p->haddr = val;
173
}
174
-
175
- index = tlb_index(env, mmu_idx, addr);
176
- entry = tlb_entry(env, mmu_idx, addr);
177
- tlb_addr = tlb_addr_write(entry);
178
-
179
- /* If the TLB entry is for a different page, reload and try again. */
180
- if (!tlb_hit(tlb_addr, addr)) {
181
- if (!victim_tlb_hit(env, mmu_idx, index, MMU_DATA_STORE,
182
- addr & TARGET_PAGE_MASK)) {
183
- tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE,
184
- mmu_idx, retaddr);
185
- index = tlb_index(env, mmu_idx, addr);
186
- entry = tlb_entry(env, mmu_idx, addr);
187
- }
188
- tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK;
189
- }
190
-
191
- /* Handle anything that isn't just a straight memory access. */
192
- if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
193
- CPUTLBEntryFull *full;
194
- bool need_swap;
195
-
196
- /* For anything that is unaligned, recurse through byte stores. */
197
- if ((addr & (size - 1)) != 0) {
198
- goto do_unaligned_access;
199
- }
200
-
201
- full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
202
-
203
- /* Handle watchpoints. */
204
- if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
205
- /* On watchpoint hit, this will longjmp out. */
206
- cpu_check_watchpoint(env_cpu(env), addr, size,
207
- full->attrs, BP_MEM_WRITE, retaddr);
208
- }
209
-
210
- need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
211
-
212
- /* Handle I/O access. */
213
- if (tlb_addr & TLB_MMIO) {
214
- io_writex(env, full, mmu_idx, val, addr, retaddr,
215
- op ^ (need_swap * MO_BSWAP));
216
- return;
217
- }
218
-
219
- /* Ignore writes to ROM. */
220
- if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) {
221
- return;
222
- }
223
-
224
- /* Handle clean RAM pages. */
225
- if (tlb_addr & TLB_NOTDIRTY) {
226
- notdirty_write(env_cpu(env), addr, size, full, retaddr);
227
- }
228
-
229
- haddr = (void *)((uintptr_t)addr + entry->addend);
230
-
231
- /*
232
- * Keep these two store_memop separate to ensure that the compiler
233
- * is able to fold the entire function to a single instruction.
234
- * There is a build-time assert inside to remind you of this. ;-)
235
- */
236
- if (unlikely(need_swap)) {
237
- store_memop(haddr, val, op ^ MO_BSWAP);
238
- } else {
239
- store_memop(haddr, val, op);
240
- }
241
- return;
242
- }
243
-
244
- /* Handle slow unaligned access (it spans two pages or IO). */
245
- if (size > 1
246
- && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
247
- >= TARGET_PAGE_SIZE)) {
248
- do_unaligned_access:
249
- store_helper_unaligned(env, addr, val, retaddr, size,
250
- mmu_idx, memop_big_endian(op));
251
- return;
252
- }
253
-
254
- haddr = (void *)((uintptr_t)addr + entry->addend);
255
- store_memop(haddr, val, op);
256
}
257
258
-static void __attribute__((noinline))
259
-full_stb_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
260
- MemOpIdx oi, uintptr_t retaddr)
261
+static void do_st_2(CPUArchState *env, MMULookupPageData *p, uint16_t val,
262
+ int mmu_idx, MemOp memop, uintptr_t ra)
263
{
264
- validate_memop(oi, MO_UB);
265
- store_helper(env, addr, val, oi, retaddr, MO_UB);
266
+ if (unlikely(p->flags & TLB_MMIO)) {
267
+ io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop);
268
+ } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
269
+ /* nothing */
270
+ } else {
271
+ /* Swap to host endian if necessary, then store. */
272
+ if (memop & MO_BSWAP) {
273
+ val = bswap16(val);
274
+ }
275
+ store_memop(p->haddr, val, MO_UW);
276
+ }
277
+}
278
+
279
+static void do_st_4(CPUArchState *env, MMULookupPageData *p, uint32_t val,
280
+ int mmu_idx, MemOp memop, uintptr_t ra)
281
+{
282
+ if (unlikely(p->flags & TLB_MMIO)) {
283
+ io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop);
284
+ } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
285
+ /* nothing */
286
+ } else {
287
+ /* Swap to host endian if necessary, then store. */
288
+ if (memop & MO_BSWAP) {
289
+ val = bswap32(val);
290
+ }
291
+ store_memop(p->haddr, val, MO_UL);
292
+ }
293
+}
294
+
295
+static void do_st_8(CPUArchState *env, MMULookupPageData *p, uint64_t val,
296
+ int mmu_idx, MemOp memop, uintptr_t ra)
297
+{
298
+ if (unlikely(p->flags & TLB_MMIO)) {
299
+ io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop);
300
+ } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
301
+ /* nothing */
302
+ } else {
303
+ /* Swap to host endian if necessary, then store. */
304
+ if (memop & MO_BSWAP) {
305
+ val = bswap64(val);
306
+ }
307
+ store_memop(p->haddr, val, MO_UQ);
308
+ }
309
}
310
311
void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
312
- MemOpIdx oi, uintptr_t retaddr)
313
+ MemOpIdx oi, uintptr_t ra)
314
{
315
- full_stb_mmu(env, addr, val, oi, retaddr);
316
+ MMULookupLocals l;
317
+ bool crosspage;
318
+
319
+ validate_memop(oi, MO_UB);
320
+ crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
321
+ tcg_debug_assert(!crosspage);
322
+
323
+ do_st_1(env, &l.page[0], val, l.mmu_idx, ra);
324
}
325
326
-static void full_le_stw_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
327
- MemOpIdx oi, uintptr_t retaddr)
328
+static void do_st2_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
329
+ MemOpIdx oi, uintptr_t ra)
330
{
331
- validate_memop(oi, MO_LEUW);
332
- store_helper(env, addr, val, oi, retaddr, MO_LEUW);
333
+ MMULookupLocals l;
334
+ bool crosspage;
335
+ uint8_t a, b;
336
+
337
+ crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
338
+ if (likely(!crosspage)) {
339
+ do_st_2(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
340
+ return;
341
+ }
342
+
343
+ if ((l.memop & MO_BSWAP) == MO_LE) {
344
+ a = val, b = val >> 8;
345
+ } else {
346
+ b = val, a = val >> 8;
347
+ }
348
+ do_st_1(env, &l.page[0], a, l.mmu_idx, ra);
349
+ do_st_1(env, &l.page[1], b, l.mmu_idx, ra);
350
}
351
352
void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
353
MemOpIdx oi, uintptr_t retaddr)
354
{
355
- full_le_stw_mmu(env, addr, val, oi, retaddr);
356
-}
357
-
358
-static void full_be_stw_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
359
- MemOpIdx oi, uintptr_t retaddr)
360
-{
361
- validate_memop(oi, MO_BEUW);
362
- store_helper(env, addr, val, oi, retaddr, MO_BEUW);
363
+ validate_memop(oi, MO_LEUW);
364
+ do_st2_mmu(env, addr, val, oi, retaddr);
365
}
366
367
void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
368
MemOpIdx oi, uintptr_t retaddr)
369
{
370
- full_be_stw_mmu(env, addr, val, oi, retaddr);
371
+ validate_memop(oi, MO_BEUW);
372
+ do_st2_mmu(env, addr, val, oi, retaddr);
373
}
374
375
-static void full_le_stl_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
376
- MemOpIdx oi, uintptr_t retaddr)
377
+static void do_st4_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
378
+ MemOpIdx oi, uintptr_t ra)
379
{
380
- validate_memop(oi, MO_LEUL);
381
- store_helper(env, addr, val, oi, retaddr, MO_LEUL);
382
+ MMULookupLocals l;
383
+ bool crosspage;
384
+
385
+ crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
386
+ if (likely(!crosspage)) {
387
+ do_st_4(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
388
+ return;
389
+ }
390
+
391
+ /* Swap to little endian for simplicity, then store by bytes. */
392
+ if ((l.memop & MO_BSWAP) != MO_LE) {
393
+ val = bswap32(val);
394
+ }
395
+ val = do_st_leN(env, &l.page[0], val, l.mmu_idx, ra);
396
+ (void) do_st_leN(env, &l.page[1], val, l.mmu_idx, ra);
397
}
398
399
void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
400
MemOpIdx oi, uintptr_t retaddr)
401
{
402
- full_le_stl_mmu(env, addr, val, oi, retaddr);
403
-}
404
-
405
-static void full_be_stl_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
406
- MemOpIdx oi, uintptr_t retaddr)
407
-{
408
- validate_memop(oi, MO_BEUL);
409
- store_helper(env, addr, val, oi, retaddr, MO_BEUL);
410
+ validate_memop(oi, MO_LEUL);
411
+ do_st4_mmu(env, addr, val, oi, retaddr);
412
}
413
414
void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
415
MemOpIdx oi, uintptr_t retaddr)
416
{
417
- full_be_stl_mmu(env, addr, val, oi, retaddr);
418
+ validate_memop(oi, MO_BEUL);
419
+ do_st4_mmu(env, addr, val, oi, retaddr);
420
+}
421
+
422
+static void do_st8_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
423
+ MemOpIdx oi, uintptr_t ra)
424
+{
425
+ MMULookupLocals l;
426
+ bool crosspage;
427
+
428
+ crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
429
+ if (likely(!crosspage)) {
430
+ do_st_8(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
431
+ return;
432
+ }
433
+
434
+ /* Swap to little endian for simplicity, then store by bytes. */
435
+ if ((l.memop & MO_BSWAP) != MO_LE) {
436
+ val = bswap64(val);
437
+ }
438
+ val = do_st_leN(env, &l.page[0], val, l.mmu_idx, ra);
439
+ (void) do_st_leN(env, &l.page[1], val, l.mmu_idx, ra);
440
}
441
442
void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
443
MemOpIdx oi, uintptr_t retaddr)
444
{
445
validate_memop(oi, MO_LEUQ);
446
- store_helper(env, addr, val, oi, retaddr, MO_LEUQ);
447
+ do_st8_mmu(env, addr, val, oi, retaddr);
448
}
449
450
void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
451
MemOpIdx oi, uintptr_t retaddr)
452
{
453
validate_memop(oi, MO_BEUQ);
454
- store_helper(env, addr, val, oi, retaddr, MO_BEUQ);
455
+ do_st8_mmu(env, addr, val, oi, retaddr);
456
}
457
458
/*
459
* Store Helpers for cpu_ldst.h
460
*/
461
462
-typedef void FullStoreHelper(CPUArchState *env, target_ulong addr,
463
- uint64_t val, MemOpIdx oi, uintptr_t retaddr);
464
-
465
-static inline void cpu_store_helper(CPUArchState *env, target_ulong addr,
466
- uint64_t val, MemOpIdx oi, uintptr_t ra,
467
- FullStoreHelper *full_store)
468
+static void plugin_store_cb(CPUArchState *env, abi_ptr addr, MemOpIdx oi)
469
{
470
- full_store(env, addr, val, oi, ra);
471
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
472
}
473
474
void cpu_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
475
MemOpIdx oi, uintptr_t retaddr)
476
{
477
- cpu_store_helper(env, addr, val, oi, retaddr, full_stb_mmu);
478
+ helper_ret_stb_mmu(env, addr, val, oi, retaddr);
479
+ plugin_store_cb(env, addr, oi);
480
}
481
482
void cpu_stw_be_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
483
MemOpIdx oi, uintptr_t retaddr)
484
{
485
- cpu_store_helper(env, addr, val, oi, retaddr, full_be_stw_mmu);
486
+ helper_be_stw_mmu(env, addr, val, oi, retaddr);
487
+ plugin_store_cb(env, addr, oi);
488
}
489
490
void cpu_stl_be_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
491
MemOpIdx oi, uintptr_t retaddr)
492
{
493
- cpu_store_helper(env, addr, val, oi, retaddr, full_be_stl_mmu);
494
+ helper_be_stl_mmu(env, addr, val, oi, retaddr);
495
+ plugin_store_cb(env, addr, oi);
496
}
497
498
void cpu_stq_be_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
499
MemOpIdx oi, uintptr_t retaddr)
500
{
501
- cpu_store_helper(env, addr, val, oi, retaddr, helper_be_stq_mmu);
502
+ helper_be_stq_mmu(env, addr, val, oi, retaddr);
503
+ plugin_store_cb(env, addr, oi);
504
}
505
506
void cpu_stw_le_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
507
MemOpIdx oi, uintptr_t retaddr)
508
{
509
- cpu_store_helper(env, addr, val, oi, retaddr, full_le_stw_mmu);
510
+ helper_le_stw_mmu(env, addr, val, oi, retaddr);
511
+ plugin_store_cb(env, addr, oi);
512
}
513
514
void cpu_stl_le_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
515
MemOpIdx oi, uintptr_t retaddr)
516
{
517
- cpu_store_helper(env, addr, val, oi, retaddr, full_le_stl_mmu);
518
+ helper_le_stl_mmu(env, addr, val, oi, retaddr);
519
+ plugin_store_cb(env, addr, oi);
520
}
521
522
void cpu_stq_le_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
523
MemOpIdx oi, uintptr_t retaddr)
524
{
525
- cpu_store_helper(env, addr, val, oi, retaddr, helper_le_stq_mmu);
526
+ helper_le_stq_mmu(env, addr, val, oi, retaddr);
527
+ plugin_store_cb(env, addr, oi);
528
}
529
530
void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
531
--
532
2.34.1
diff view generated by jsdifflib
1
From: Luc Michel <lmichel@kalray.eu>
1
This header is supposed to be private to tcg and in fact
2
does not need to be included here at all.
2
3
3
In some cases, cpu->exit_request can be false after handling the
4
Reviewed-by: Song Gao <gaosong@loongson.cn>
4
interrupt, leading to another TB being executed instead of returning
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
to the main loop.
6
7
Fix this by returning true unconditionally when in single-step mode.
8
9
Fixes: ba3c35d9c402 ("tcg/cpu-exec: precise single-stepping after an interrupt")
10
Signed-off-by: Luc Michel <lmichel@kalray.eu>
11
Message-Id: <20220214132656.11397-1-lmichel@kalray.eu>
12
[rth: Unlock iothread mutex; simplify indentation]
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
---
7
---
15
accel/tcg/cpu-exec.c | 8 ++++++--
8
target/loongarch/csr_helper.c | 1 -
16
1 file changed, 6 insertions(+), 2 deletions(-)
9
target/loongarch/iocsr_helper.c | 1 -
10
2 files changed, 2 deletions(-)
17
11
18
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
12
diff --git a/target/loongarch/csr_helper.c b/target/loongarch/csr_helper.c
19
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
20
--- a/accel/tcg/cpu-exec.c
14
--- a/target/loongarch/csr_helper.c
21
+++ b/accel/tcg/cpu-exec.c
15
+++ b/target/loongarch/csr_helper.c
22
@@ -XXX,XX +XXX,XX @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
16
@@ -XXX,XX +XXX,XX @@
23
* raised when single-stepping so that GDB doesn't miss the
17
#include "exec/cpu_ldst.h"
24
* next instruction.
18
#include "hw/irq.h"
25
*/
19
#include "cpu-csr.h"
26
- cpu->exception_index =
20
-#include "tcg/tcg-ldst.h"
27
- (cpu->singlestep_enabled ? EXCP_DEBUG : -1);
21
28
+ if (unlikely(cpu->singlestep_enabled)) {
22
target_ulong helper_csrrd_pgd(CPULoongArchState *env)
29
+ cpu->exception_index = EXCP_DEBUG;
23
{
30
+ qemu_mutex_unlock_iothread();
24
diff --git a/target/loongarch/iocsr_helper.c b/target/loongarch/iocsr_helper.c
31
+ return true;
25
index XXXXXXX..XXXXXXX 100644
32
+ }
26
--- a/target/loongarch/iocsr_helper.c
33
+ cpu->exception_index = -1;
27
+++ b/target/loongarch/iocsr_helper.c
34
*last_tb = NULL;
28
@@ -XXX,XX +XXX,XX @@
35
}
29
#include "exec/helper-proto.h"
36
/* The target hook may have updated the 'cpu->interrupt_request';
30
#include "exec/exec-all.h"
31
#include "exec/cpu_ldst.h"
32
-#include "tcg/tcg-ldst.h"
33
34
#define GET_MEMTXATTRS(cas) \
35
((MemTxAttrs){.requester_id = env_cpu(cas)->cpu_index})
37
--
36
--
38
2.25.1
37
2.34.1
diff view generated by jsdifflib