1
The following changes since commit 3c8c36c9087da957f580a9bb5ebf7814a753d1c6:
1
v3: Rebase and add a few more patches.
2
2
3
Merge remote-tracking branch 'remotes/kraxel/tags/ui-20201104-pull-request' into staging (2020-11-04 16:52:17 +0000)
3
4
r~
5
6
7
The following changes since commit 384dbdda94c0bba55bf186cccd3714bbb9b737e9:
8
9
Merge tag 'migration-20231020-pull-request' of https://gitlab.com/juan.quintela/qemu into staging (2023-10-20 06:46:53 -0700)
4
10
5
are available in the Git repository at:
11
are available in the Git repository at:
6
12
7
https://github.com/rth7680/qemu.git tags/pull-tcg-20201104
13
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20231023
8
14
9
for you to fetch changes up to c56caea3b2a4ef5d760266f554df0d92c5a45f87:
15
for you to fetch changes up to e40df3522b384d3b7dd38187d735bd6228b20e47:
10
16
11
tcg: Revert "tcg/optimize: Flush data at labels not TCG_OPF_BB_END" (2020-11-04 10:35:40 -0800)
17
target/xtensa: Use tcg_gen_sextract_i32 (2023-10-22 16:44:49 -0700)
12
18
13
----------------------------------------------------------------
19
----------------------------------------------------------------
14
Fix assert in set_jmp_reset_offset
20
tcg: Drop unused tcg_temp_free define
15
Revert cross-branch optimization in tcg/optimize.c.
21
tcg: Introduce tcg_use_softmmu
22
tcg: Optimize past conditional branches
23
tcg: Use constant zero when expanding with divu2
24
tcg: Add negsetcondi
25
tcg: Define MO_TL
26
tcg: Export tcg_gen_ext_{i32,i64,tl}
27
target/*: Use tcg_gen_ext_*
28
tcg/ppc: Enable direct branching tcg_out_goto_tb with TCG_REG_TB
29
tcg/ppc: Use ADDPCIS for power9
30
tcg/ppc: Use prefixed instructions for power10
31
tcg/ppc: Disable TCG_REG_TB for Power9/Power10
32
tcg/ppc: Enable direct branching tcg_out_goto_tb with TCG_REG_TB
33
tcg/ppc: Use ADDPCIS for power9
34
tcg/ppc: Use prefixed instructions for power10
35
tcg/ppc: Disable TCG_REG_TB for Power9/Power10
16
36
17
----------------------------------------------------------------
37
----------------------------------------------------------------
18
Richard Henderson (2):
38
Jordan Niethe (1):
19
tcg: Remove assert from set_jmp_reset_offset
39
tcg/ppc: Enable direct branching tcg_out_goto_tb with TCG_REG_TB
20
tcg: Revert "tcg/optimize: Flush data at labels not TCG_OPF_BB_END"
21
40
22
tcg/optimize.c | 35 +++++++++++++++++------------------
41
Mike Frysinger (1):
23
tcg/tcg.c | 9 +++++----
42
tcg: drop unused tcg_temp_free define
24
2 files changed, 22 insertions(+), 22 deletions(-)
25
43
44
Paolo Bonzini (2):
45
tcg: add negsetcondi
46
tcg: Define MO_TL
47
48
Richard Henderson (34):
49
tcg/ppc: Untabify tcg-target.c.inc
50
tcg/ppc: Reinterpret tb-relative to TB+4
51
tcg/ppc: Use ADDPCIS in tcg_out_tb_start
52
tcg/ppc: Use ADDPCIS in tcg_out_movi_int
53
tcg/ppc: Use ADDPCIS for the constant pool
54
tcg/ppc: Use ADDPCIS in tcg_out_goto_tb
55
tcg/ppc: Use PADDI in tcg_out_movi
56
tcg/ppc: Use prefixed instructions in tcg_out_mem_long
57
tcg/ppc: Use PLD in tcg_out_movi for constant pool
58
tcg/ppc: Use prefixed instructions in tcg_out_dupi_vec
59
tcg/ppc: Use PLD in tcg_out_goto_tb
60
tcg/ppc: Disable TCG_REG_TB for Power9/Power10
61
tcg: Introduce tcg_use_softmmu
62
tcg: Provide guest_base fallback for system mode
63
tcg/arm: Use tcg_use_softmmu
64
tcg/aarch64: Use tcg_use_softmmu
65
tcg/i386: Use tcg_use_softmmu
66
tcg/loongarch64: Use tcg_use_softmmu
67
tcg/mips: Use tcg_use_softmmu
68
tcg/ppc: Use tcg_use_softmmu
69
tcg/riscv: Do not reserve TCG_GUEST_BASE_REG for guest_base zero
70
tcg/riscv: Use tcg_use_softmmu
71
tcg/s390x: Use tcg_use_softmmu
72
tcg: Use constant zero when expanding with divu2
73
tcg: Optimize past conditional branches
74
tcg: Add tcg_gen_{ld,st}_i128
75
target/i386: Use i128 for 128 and 256-bit loads and stores
76
tcg: Export tcg_gen_ext_{i32,i64,tl}
77
target/arm: Use tcg_gen_ext_i64
78
target/i386: Use tcg_gen_ext_tl
79
target/m68k: Use tcg_gen_ext_i32
80
target/rx: Use tcg_gen_ext_i32
81
target/tricore: Use tcg_gen_*extract_tl
82
target/xtensa: Use tcg_gen_sextract_i32
83
84
include/exec/target_long.h | 2 +
85
include/tcg/tcg-op-common.h | 9 +
86
include/tcg/tcg-op.h | 6 +-
87
include/tcg/tcg.h | 8 +-
88
target/arm/tcg/translate-a64.c | 37 +--
89
target/i386/tcg/translate.c | 91 +++----
90
target/m68k/translate.c | 23 +-
91
target/rx/translate.c | 11 +-
92
target/tricore/translate.c | 20 +-
93
target/xtensa/translate.c | 12 +-
94
tcg/optimize.c | 8 +-
95
tcg/tcg-op-ldst.c | 28 +-
96
tcg/tcg-op.c | 50 +++-
97
tcg/tcg.c | 13 +-
98
tcg/aarch64/tcg-target.c.inc | 177 ++++++------
99
tcg/arm/tcg-target.c.inc | 203 +++++++-------
100
tcg/i386/tcg-target.c.inc | 198 +++++++-------
101
tcg/loongarch64/tcg-target.c.inc | 126 +++++----
102
tcg/mips/tcg-target.c.inc | 231 ++++++++--------
103
tcg/ppc/tcg-target.c.inc | 561 ++++++++++++++++++++++++++-------------
104
tcg/riscv/tcg-target.c.inc | 189 ++++++-------
105
tcg/s390x/tcg-target.c.inc | 161 ++++++-----
106
22 files changed, 1152 insertions(+), 1012 deletions(-)
diff view generated by jsdifflib
New patch
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
---
3
tcg/ppc/tcg-target.c.inc | 6 +++---
4
1 file changed, 3 insertions(+), 3 deletions(-)
1
5
6
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
7
index XXXXXXX..XXXXXXX 100644
8
--- a/tcg/ppc/tcg-target.c.inc
9
+++ b/tcg/ppc/tcg-target.c.inc
10
@@ -XXX,XX +XXX,XX @@ static inline bool in_range_b(tcg_target_long target)
11
}
12
13
static uint32_t reloc_pc24_val(const tcg_insn_unit *pc,
14
-             const tcg_insn_unit *target)
15
+ const tcg_insn_unit *target)
16
{
17
ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
18
tcg_debug_assert(in_range_b(disp));
19
@@ -XXX,XX +XXX,XX @@ static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
20
}
21
22
static uint16_t reloc_pc14_val(const tcg_insn_unit *pc,
23
-             const tcg_insn_unit *target)
24
+ const tcg_insn_unit *target)
25
{
26
ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
27
tcg_debug_assert(disp == (int16_t) disp);
28
@@ -XXX,XX +XXX,XX @@ static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
29
tcgv_vec_arg(t1), tcgv_vec_arg(t2));
30
vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0),
31
tcgv_vec_arg(v0), tcgv_vec_arg(t1));
32
-    break;
33
+ break;
34
35
case MO_32:
36
tcg_debug_assert(!have_isa_2_07);
37
--
38
2.34.1
diff view generated by jsdifflib
New patch
1
From: Jordan Niethe <jniethe5@gmail.com>
1
2
3
Direct branch patching was disabled when using TCG_REG_TB in commit
4
736a1588c1 ("tcg/ppc: Fix race in goto_tb implementation").
5
6
The issue with direct branch patching with TCG_REG_TB is the lack of
7
synchronization between the new TCG_REG_TB being established and the
8
direct branch being patched in.
9
10
If each translation block is responsible for establishing its own
11
TCG_REG_TB then there can be no synchronization issue.
12
13
Make each translation block begin by setting up its own TCG_REG_TB.
14
Use the preferred 'bcl 20,31,$+4' sequence.
15
16
Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
17
[rth: Split out tcg_out_tb_start, power9 addpcis]
18
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
19
---
20
tcg/ppc/tcg-target.c.inc | 48 ++++++++++++++--------------------------
21
1 file changed, 17 insertions(+), 31 deletions(-)
22
23
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
24
index XXXXXXX..XXXXXXX 100644
25
--- a/tcg/ppc/tcg-target.c.inc
26
+++ b/tcg/ppc/tcg-target.c.inc
27
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
28
29
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
30
tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
31
- if (USE_REG_TB) {
32
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, tcg_target_call_iarg_regs[1]);
33
- }
34
tcg_out32(s, BCCTR | BO_ALWAYS);
35
36
/* Epilogue */
37
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
38
39
static void tcg_out_tb_start(TCGContext *s)
40
{
41
- /* nothing to do */
42
+ /* Load TCG_REG_TB. */
43
+ if (USE_REG_TB) {
44
+ /* bcl 20,31,$+4 (preferred form for getting nia) */
45
+ tcg_out32(s, BC | BO_ALWAYS | BI(7, CR_SO) | 0x4 | LK);
46
+ tcg_out32(s, MFSPR | RT(TCG_REG_TB) | LR);
47
+ tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, -4));
48
+ }
49
}
50
51
static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg)
52
@@ -XXX,XX +XXX,XX @@ static void tcg_out_goto_tb(TCGContext *s, int which)
53
{
54
uintptr_t ptr = get_jmp_target_addr(s, which);
55
56
+ /* Direct branch will be patched by tb_target_set_jmp_target. */
57
+ set_jmp_insn_offset(s, which);
58
+ tcg_out32(s, NOP);
59
+
60
+ /* When branch is out of range, fall through to indirect. */
61
if (USE_REG_TB) {
62
ptrdiff_t offset = tcg_tbrel_diff(s, (void *)ptr);
63
- tcg_out_mem_long(s, LD, LDX, TCG_REG_TB, TCG_REG_TB, offset);
64
-
65
- /* TODO: Use direct branches when possible. */
66
- set_jmp_insn_offset(s, which);
67
- tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR);
68
-
69
- tcg_out32(s, BCCTR | BO_ALWAYS);
70
-
71
- /* For the unlinked case, need to reset TCG_REG_TB. */
72
- set_jmp_reset_offset(s, which);
73
- tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB,
74
- -tcg_current_code_size(s));
75
+ tcg_out_mem_long(s, LD, LDX, TCG_REG_TMP1, TCG_REG_TB, offset);
76
} else {
77
- /* Direct branch will be patched by tb_target_set_jmp_target. */
78
- set_jmp_insn_offset(s, which);
79
- tcg_out32(s, NOP);
80
-
81
- /* When branch is out of range, fall through to indirect. */
82
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - (int16_t)ptr);
83
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, (int16_t)ptr);
84
- tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
85
- tcg_out32(s, BCCTR | BO_ALWAYS);
86
- set_jmp_reset_offset(s, which);
87
}
88
+
89
+ tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
90
+ tcg_out32(s, BCCTR | BO_ALWAYS);
91
+ set_jmp_reset_offset(s, which);
92
}
93
94
void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
95
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
96
intptr_t diff = addr - jmp_rx;
97
tcg_insn_unit insn;
98
99
- if (USE_REG_TB) {
100
- return;
101
- }
102
-
103
if (in_range_b(diff)) {
104
insn = B | (diff & 0x3fffffc);
105
} else {
106
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
107
switch (opc) {
108
case INDEX_op_goto_ptr:
109
tcg_out32(s, MTSPR | RS(args[0]) | CTR);
110
- if (USE_REG_TB) {
111
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, args[0]);
112
- }
113
tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0));
114
tcg_out32(s, BCCTR | BO_ALWAYS);
115
break;
116
--
117
2.34.1
diff view generated by jsdifflib
New patch
1
It saves one insn to load the address of TB+4 instead of TB.
2
Adjust all of the indexing to match.
1
3
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/ppc/tcg-target.c.inc | 15 ++++++++++-----
7
1 file changed, 10 insertions(+), 5 deletions(-)
8
9
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/ppc/tcg-target.c.inc
12
+++ b/tcg/ppc/tcg-target.c.inc
13
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_callee_save_regs[] = {
14
TCG_REG_R31
15
};
16
17
+/* For PPC, we use TB+4 instead of TB as the base. */
18
+static inline ptrdiff_t ppc_tbrel_diff(TCGContext *s, const void *target)
19
+{
20
+ return tcg_tbrel_diff(s, target) - 4;
21
+}
22
+
23
static inline bool in_range_b(tcg_target_long target)
24
{
25
return target == sextract64(target, 0, 26);
26
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
27
}
28
29
/* Load addresses within the TB with one insn. */
30
- tb_diff = tcg_tbrel_diff(s, (void *)arg);
31
+ tb_diff = ppc_tbrel_diff(s, (void *)arg);
32
if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) {
33
tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff));
34
return;
35
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
36
/* Use the constant pool, if possible. */
37
if (!in_prologue && USE_REG_TB) {
38
new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr,
39
- tcg_tbrel_diff(s, NULL));
40
+ ppc_tbrel_diff(s, NULL));
41
tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0));
42
return;
43
}
44
@@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
45
*/
46
if (USE_REG_TB) {
47
rel = R_PPC_ADDR16;
48
- add = tcg_tbrel_diff(s, NULL);
49
+ add = ppc_tbrel_diff(s, NULL);
50
} else {
51
rel = R_PPC_ADDR32;
52
add = 0;
53
@@ -XXX,XX +XXX,XX @@ static void tcg_out_tb_start(TCGContext *s)
54
/* bcl 20,31,$+4 (preferred form for getting nia) */
55
tcg_out32(s, BC | BO_ALWAYS | BI(7, CR_SO) | 0x4 | LK);
56
tcg_out32(s, MFSPR | RT(TCG_REG_TB) | LR);
57
- tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, -4));
58
}
59
}
60
61
@@ -XXX,XX +XXX,XX @@ static void tcg_out_goto_tb(TCGContext *s, int which)
62
63
/* When branch is out of range, fall through to indirect. */
64
if (USE_REG_TB) {
65
- ptrdiff_t offset = tcg_tbrel_diff(s, (void *)ptr);
66
+ ptrdiff_t offset = ppc_tbrel_diff(s, (void *)ptr);
67
tcg_out_mem_long(s, LD, LDX, TCG_REG_TMP1, TCG_REG_TB, offset);
68
} else {
69
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - (int16_t)ptr);
70
--
71
2.34.1
diff view generated by jsdifflib
New patch
1
With ISA v3.0, we can use ADDPCIS instead of BCL+MFLR to load NIA.
1
2
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
tcg/ppc/tcg-target.c.inc | 25 ++++++++++++++++++++++---
6
1 file changed, 22 insertions(+), 3 deletions(-)
7
8
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/ppc/tcg-target.c.inc
11
+++ b/tcg/ppc/tcg-target.c.inc
12
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
13
#define CRNAND XO19(225)
14
#define CROR XO19(449)
15
#define CRNOR XO19( 33)
16
+#define ADDPCIS XO19( 2)
17
18
#define EXTSB XO31(954)
19
#define EXTSH XO31(922)
20
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c)
21
tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2));
22
}
23
24
+static void tcg_out_addpcis(TCGContext *s, TCGReg dst, intptr_t imm)
25
+{
26
+ uint32_t d0, d1, d2;
27
+
28
+ tcg_debug_assert((imm & 0xffff) == 0);
29
+ tcg_debug_assert(imm == (int32_t)imm);
30
+
31
+ d2 = extract32(imm, 16, 1);
32
+ d1 = extract32(imm, 17, 5);
33
+ d0 = extract32(imm, 22, 10);
34
+ tcg_out32(s, ADDPCIS | RT(dst) | (d1 << 16) | (d0 << 6) | d2);
35
+}
36
+
37
static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src, int flags)
38
{
39
TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
40
@@ -XXX,XX +XXX,XX @@ static void tcg_out_tb_start(TCGContext *s)
41
{
42
/* Load TCG_REG_TB. */
43
if (USE_REG_TB) {
44
- /* bcl 20,31,$+4 (preferred form for getting nia) */
45
- tcg_out32(s, BC | BO_ALWAYS | BI(7, CR_SO) | 0x4 | LK);
46
- tcg_out32(s, MFSPR | RT(TCG_REG_TB) | LR);
47
+ if (have_isa_3_00) {
48
+ /* lnia REG_TB */
49
+ tcg_out_addpcis(s, TCG_REG_TB, 0);
50
+ } else {
51
+ /* bcl 20,31,$+4 (preferred form for getting nia) */
52
+ tcg_out32(s, BC | BO_ALWAYS | BI(7, CR_SO) | 0x4 | LK);
53
+ tcg_out32(s, MFSPR | RT(TCG_REG_TB) | LR);
54
+ }
55
}
56
}
57
58
--
59
2.34.1
diff view generated by jsdifflib
New patch
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
---
3
tcg/ppc/tcg-target.c.inc | 13 +++++++++++++
4
1 file changed, 13 insertions(+)
1
5
6
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
7
index XXXXXXX..XXXXXXX 100644
8
--- a/tcg/ppc/tcg-target.c.inc
9
+++ b/tcg/ppc/tcg-target.c.inc
10
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
11
return;
12
}
13
14
+ /* Load addresses within 2GB with 2 insns. */
15
+ if (have_isa_3_00) {
16
+ intptr_t hi = tcg_pcrel_diff(s, (void *)arg) - 4;
17
+ int16_t lo = hi;
18
+
19
+ hi -= lo;
20
+ if (hi == (int32_t)hi) {
21
+ tcg_out_addpcis(s, TCG_REG_TMP2, hi);
22
+ tcg_out32(s, ADDI | TAI(ret, TCG_REG_TMP2, lo));
23
+ return;
24
+ }
25
+ }
26
+
27
/* Load addresses within 2GB of TB with 2 (or rarely 3) insns. */
28
if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) {
29
tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff);
30
--
31
2.34.1
diff view generated by jsdifflib
New patch
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
---
3
tcg/ppc/tcg-target.c.inc | 12 ++++++++++++
4
1 file changed, 12 insertions(+)
1
5
6
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
7
index XXXXXXX..XXXXXXX 100644
8
--- a/tcg/ppc/tcg-target.c.inc
9
+++ b/tcg/ppc/tcg-target.c.inc
10
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
11
tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0));
12
return;
13
}
14
+ if (have_isa_3_00) {
15
+ tcg_out_addpcis(s, TCG_REG_TMP2, 0);
16
+ new_pool_label(s, arg, R_PPC_REL14, s->code_ptr, 0);
17
+ tcg_out32(s, LD | TAI(ret, TCG_REG_TMP2, 0));
18
+ return;
19
+ }
20
21
tmp = arg >> 31 >> 1;
22
tcg_out_movi(s, TCG_TYPE_I32, ret, tmp);
23
@@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
24
if (USE_REG_TB) {
25
rel = R_PPC_ADDR16;
26
add = ppc_tbrel_diff(s, NULL);
27
+ } else if (have_isa_3_00) {
28
+ tcg_out_addpcis(s, TCG_REG_TMP1, 0);
29
+ rel = R_PPC_REL14;
30
+ add = 0;
31
} else {
32
rel = R_PPC_ADDR32;
33
add = 0;
34
@@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
35
if (USE_REG_TB) {
36
tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0));
37
load_insn |= RA(TCG_REG_TB);
38
+ } else if (have_isa_3_00) {
39
+ tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
40
} else {
41
tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0));
42
tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
43
--
44
2.34.1
diff view generated by jsdifflib
New patch
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
---
3
tcg/ppc/tcg-target.c.inc | 11 +++++++++--
4
1 file changed, 9 insertions(+), 2 deletions(-)
1
5
6
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
7
index XXXXXXX..XXXXXXX 100644
8
--- a/tcg/ppc/tcg-target.c.inc
9
+++ b/tcg/ppc/tcg-target.c.inc
10
@@ -XXX,XX +XXX,XX @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg)
11
static void tcg_out_goto_tb(TCGContext *s, int which)
12
{
13
uintptr_t ptr = get_jmp_target_addr(s, which);
14
+ int16_t lo;
15
16
/* Direct branch will be patched by tb_target_set_jmp_target. */
17
set_jmp_insn_offset(s, which);
18
@@ -XXX,XX +XXX,XX @@ static void tcg_out_goto_tb(TCGContext *s, int which)
19
if (USE_REG_TB) {
20
ptrdiff_t offset = ppc_tbrel_diff(s, (void *)ptr);
21
tcg_out_mem_long(s, LD, LDX, TCG_REG_TMP1, TCG_REG_TB, offset);
22
+ } else if (have_isa_3_00) {
23
+ ptrdiff_t offset = tcg_pcrel_diff(s, (void *)ptr) - 4;
24
+ lo = offset;
25
+ tcg_out_addpcis(s, TCG_REG_TMP1, offset - lo);
26
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo);
27
} else {
28
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - (int16_t)ptr);
29
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, (int16_t)ptr);
30
+ lo = ptr;
31
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - lo);
32
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo);
33
}
34
35
tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
36
--
37
2.34.1
diff view generated by jsdifflib
New patch
1
PADDI can load 34-bit immediates and 34-bit pc-relative addresses.
1
2
3
Reviewed-by: Jordan Niethe <jniethe5@gmail.com>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/ppc/tcg-target.c.inc | 51 ++++++++++++++++++++++++++++++++++++++++
7
1 file changed, 51 insertions(+)
8
9
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/ppc/tcg-target.c.inc
12
+++ b/tcg/ppc/tcg-target.c.inc
13
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
14
return true;
15
}
16
17
+/* Ensure that the prefixed instruction does not cross a 64-byte boundary. */
18
+static bool tcg_out_need_prefix_align(TCGContext *s)
19
+{
20
+ return ((uintptr_t)s->code_ptr & 0x3f) == 0x3c;
21
+}
22
+
23
+static void tcg_out_prefix_align(TCGContext *s)
24
+{
25
+ if (tcg_out_need_prefix_align(s)) {
26
+ tcg_out32(s, NOP);
27
+ }
28
+}
29
+
30
+static ptrdiff_t tcg_pcrel_diff_for_prefix(TCGContext *s, const void *target)
31
+{
32
+ return tcg_pcrel_diff(s, target) - (tcg_out_need_prefix_align(s) ? 4 : 0);
33
+}
34
+
35
+/* Output Type 10 Prefix - Modified Load/Store Form (MLS:D) */
36
+static void tcg_out_mls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
37
+ unsigned ra, tcg_target_long imm, bool r)
38
+{
39
+ tcg_insn_unit p, i;
40
+
41
+ p = OPCD(1) | (2 << 24) | (r << 20) | ((imm >> 16) & 0x3ffff);
42
+ i = opc | TAI(rt, ra, imm);
43
+
44
+ tcg_out_prefix_align(s);
45
+ tcg_out32(s, p);
46
+ tcg_out32(s, i);
47
+}
48
+
49
static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
50
TCGReg base, tcg_target_long offset);
51
52
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
53
return;
54
}
55
56
+ /*
57
+ * Load values up to 34 bits, and pc-relative addresses,
58
+ * with one prefixed insn.
59
+ */
60
+ if (have_isa_3_10) {
61
+ if (arg == sextract64(arg, 0, 34)) {
62
+ /* pli ret,value = paddi ret,0,value,0 */
63
+ tcg_out_mls_d(s, ADDI, ret, 0, arg, 0);
64
+ return;
65
+ }
66
+
67
+ tmp = tcg_pcrel_diff_for_prefix(s, (void *)arg);
68
+ if (tmp == sextract64(tmp, 0, 34)) {
69
+ /* pla ret,value = paddi ret,0,value,1 */
70
+ tcg_out_mls_d(s, ADDI, ret, 0, tmp, 1);
71
+ return;
72
+ }
73
+ }
74
+
75
/* Load 32-bit immediates with two insns. Note that we've already
76
eliminated bare ADDIS, so we know both insns are required. */
77
if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
78
--
79
2.34.1
diff view generated by jsdifflib
New patch
1
When the offset is out of range of the non-prefixed insn, but
2
fits the 34-bit immediate of the prefixed insn, use that.
1
3
4
Reviewed-by: Jordan Niethe <jniethe5@gmail.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/ppc/tcg-target.c.inc | 66 ++++++++++++++++++++++++++++++++++++++++
8
1 file changed, 66 insertions(+)
9
10
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/ppc/tcg-target.c.inc
13
+++ b/tcg/ppc/tcg-target.c.inc
14
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
15
#define STDX XO31(149)
16
#define STQ XO62( 2)
17
18
+#define PLWA OPCD( 41)
19
+#define PLD OPCD( 57)
20
+#define PLXSD OPCD( 42)
21
+#define PLXV OPCD(25 * 2 + 1) /* force tx=1 */
22
+
23
+#define PSTD OPCD( 61)
24
+#define PSTXSD OPCD( 46)
25
+#define PSTXV OPCD(27 * 2 + 1) /* force sx=1 */
26
+
27
#define ADDIC OPCD( 12)
28
#define ADDI OPCD( 14)
29
#define ADDIS OPCD( 15)
30
@@ -XXX,XX +XXX,XX @@ static ptrdiff_t tcg_pcrel_diff_for_prefix(TCGContext *s, const void *target)
31
return tcg_pcrel_diff(s, target) - (tcg_out_need_prefix_align(s) ? 4 : 0);
32
}
33
34
+/* Output Type 00 Prefix - 8-Byte Load/Store Form (8LS:D) */
35
+static void tcg_out_8ls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
36
+ unsigned ra, tcg_target_long imm, bool r)
37
+{
38
+ tcg_insn_unit p, i;
39
+
40
+ p = OPCD(1) | (r << 20) | ((imm >> 16) & 0x3ffff);
41
+ i = opc | TAI(rt, ra, imm);
42
+
43
+ tcg_out_prefix_align(s);
44
+ tcg_out32(s, p);
45
+ tcg_out32(s, i);
46
+}
47
+
48
/* Output Type 10 Prefix - Modified Load/Store Form (MLS:D) */
49
static void tcg_out_mls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
50
unsigned ra, tcg_target_long imm, bool r)
51
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
52
break;
53
}
54
55
+ /* For unaligned or large offsets, use the prefixed form. */
56
+ if (have_isa_3_10
57
+ && (offset != (int16_t)offset || (offset & align))
58
+ && offset == sextract64(offset, 0, 34)) {
59
+ /*
60
+ * Note that the MLS:D insns retain their un-prefixed opcode,
61
+ * while the 8LS:D insns use a different opcode space.
62
+ */
63
+ switch (opi) {
64
+ case LBZ:
65
+ case LHZ:
66
+ case LHA:
67
+ case LWZ:
68
+ case STB:
69
+ case STH:
70
+ case STW:
71
+ case ADDI:
72
+ tcg_out_mls_d(s, opi, rt, base, offset, 0);
73
+ return;
74
+ case LWA:
75
+ tcg_out_8ls_d(s, PLWA, rt, base, offset, 0);
76
+ return;
77
+ case LD:
78
+ tcg_out_8ls_d(s, PLD, rt, base, offset, 0);
79
+ return;
80
+ case STD:
81
+ tcg_out_8ls_d(s, PSTD, rt, base, offset, 0);
82
+ return;
83
+ case LXSD:
84
+ tcg_out_8ls_d(s, PLXSD, rt & 31, base, offset, 0);
85
+ return;
86
+ case STXSD:
87
+ tcg_out_8ls_d(s, PSTXSD, rt & 31, base, offset, 0);
88
+ return;
89
+ case LXV:
90
+ tcg_out_8ls_d(s, PLXV, rt & 31, base, offset, 0);
91
+ return;
92
+ case STXV:
93
+ tcg_out_8ls_d(s, PSTXV, rt & 31, base, offset, 0);
94
+ return;
95
+ }
96
+ }
97
+
98
/* For unaligned, or very large offsets, use the indexed form. */
99
if (offset & align || offset != (int32_t)offset || opi == 0) {
100
if (rs == base) {
101
--
102
2.34.1
diff view generated by jsdifflib
New patch
1
The prefixed instruction has a pc-relative form to use here.
1
2
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
tcg/ppc/tcg-target.c.inc | 24 ++++++++++++++++++++++++
6
1 file changed, 24 insertions(+)
7
8
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/ppc/tcg-target.c.inc
11
+++ b/tcg/ppc/tcg-target.c.inc
12
@@ -XXX,XX +XXX,XX @@
13
#define ALL_GENERAL_REGS 0xffffffffu
14
#define ALL_VECTOR_REGS 0xffffffff00000000ull
15
16
+#ifndef R_PPC64_PCREL34
17
+#define R_PPC64_PCREL34 132
18
+#endif
19
+
20
#define have_isel (cpuinfo & CPUINFO_ISEL)
21
22
#ifndef CONFIG_SOFTMMU
23
@@ -XXX,XX +XXX,XX @@ static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
24
return false;
25
}
26
27
+static bool reloc_pc34(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
28
+{
29
+ const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
30
+ ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
31
+
32
+ if (disp == sextract64(disp, 0, 34)) {
33
+ src_rw[0] = (src_rw[0] & ~0x3ffff) | ((disp >> 16) & 0x3ffff);
34
+ src_rw[1] = (src_rw[1] & ~0xffff) | (disp & 0xffff);
35
+ return true;
36
+ }
37
+ return false;
38
+}
39
+
40
/* test if a constant matches the constraint */
41
static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
42
{
43
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
44
return reloc_pc14(code_ptr, target);
45
case R_PPC_REL24:
46
return reloc_pc24(code_ptr, target);
47
+ case R_PPC64_PCREL34:
48
+ return reloc_pc34(code_ptr, target);
49
case R_PPC_ADDR16:
50
/*
51
* We are (slightly) abusing this relocation type. In particular,
52
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
53
tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0));
54
return;
55
}
56
+ if (have_isa_3_10) {
57
+ tcg_out_8ls_d(s, PLD, ret, 0, 0, 1);
58
+ new_pool_label(s, arg, R_PPC64_PCREL34, s->code_ptr - 2, 0);
59
+ return;
60
+ }
61
if (have_isa_3_00) {
62
tcg_out_addpcis(s, TCG_REG_TMP2, 0);
63
new_pool_label(s, arg, R_PPC_REL14, s->code_ptr, 0);
64
--
65
2.34.1
diff view generated by jsdifflib
New patch
1
The prefixed instructions have a pc-relative form to use here.
1
2
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
tcg/ppc/tcg-target.c.inc | 9 +++++++++
6
1 file changed, 9 insertions(+)
7
8
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/ppc/tcg-target.c.inc
11
+++ b/tcg/ppc/tcg-target.c.inc
12
@@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
13
if (USE_REG_TB) {
14
rel = R_PPC_ADDR16;
15
add = ppc_tbrel_diff(s, NULL);
16
+ } else if (have_isa_3_10) {
17
+ if (type == TCG_TYPE_V64) {
18
+ tcg_out_8ls_d(s, PLXSD, ret & 31, 0, 0, 1);
19
+ new_pool_label(s, val, R_PPC64_PCREL34, s->code_ptr - 2, 0);
20
+ } else {
21
+ tcg_out_8ls_d(s, PLXV, ret & 31, 0, 0, 1);
22
+ new_pool_l2(s, R_PPC64_PCREL34, s->code_ptr - 2, 0, val, val);
23
+ }
24
+ return;
25
} else if (have_isa_3_00) {
26
tcg_out_addpcis(s, TCG_REG_TMP1, 0);
27
rel = R_PPC_REL14;
28
--
29
2.34.1
diff view generated by jsdifflib
New patch
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
---
3
tcg/ppc/tcg-target.c.inc | 3 +++
4
1 file changed, 3 insertions(+)
1
5
6
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
7
index XXXXXXX..XXXXXXX 100644
8
--- a/tcg/ppc/tcg-target.c.inc
9
+++ b/tcg/ppc/tcg-target.c.inc
10
@@ -XXX,XX +XXX,XX @@ static void tcg_out_goto_tb(TCGContext *s, int which)
11
if (USE_REG_TB) {
12
ptrdiff_t offset = ppc_tbrel_diff(s, (void *)ptr);
13
tcg_out_mem_long(s, LD, LDX, TCG_REG_TMP1, TCG_REG_TB, offset);
14
+ } else if (have_isa_3_10) {
15
+ ptrdiff_t offset = tcg_pcrel_diff_for_prefix(s, (void *)ptr);
16
+ tcg_out_8ls_d(s, PLD, TCG_REG_TMP1, 0, offset, 1);
17
} else if (have_isa_3_00) {
18
ptrdiff_t offset = tcg_pcrel_diff(s, (void *)ptr) - 4;
19
lo = offset;
20
--
21
2.34.1
diff view generated by jsdifflib
New patch
1
This appears to slightly improve performance on power9/10.
1
2
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
tcg/ppc/tcg-target.c.inc | 2 +-
6
1 file changed, 1 insertion(+), 1 deletion(-)
7
8
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/ppc/tcg-target.c.inc
11
+++ b/tcg/ppc/tcg-target.c.inc
12
@@ -XXX,XX +XXX,XX @@
13
#define TCG_VEC_TMP2 TCG_REG_V1
14
15
#define TCG_REG_TB TCG_REG_R31
16
-#define USE_REG_TB (TCG_TARGET_REG_BITS == 64)
17
+#define USE_REG_TB (TCG_TARGET_REG_BITS == 64 && !have_isa_3_00)
18
19
/* Shorthand for size of a pointer. Avoid promotion to unsigned. */
20
#define SZP ((int)sizeof(void *))
21
--
22
2.34.1
diff view generated by jsdifflib
New patch
1
Begin disconnecting CONFIG_SOFTMMU from !CONFIG_USER_ONLY.
2
Introduce a variable which can be set at startup to select
3
one method or another for user-only.
1
4
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
include/tcg/tcg.h | 8 ++++++--
9
tcg/tcg-op-ldst.c | 14 +++++++-------
10
tcg/tcg.c | 9 ++++++---
11
3 files changed, 19 insertions(+), 12 deletions(-)
12
13
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/include/tcg/tcg.h
16
+++ b/include/tcg/tcg.h
17
@@ -XXX,XX +XXX,XX @@ struct TCGContext {
18
int nb_ops;
19
TCGType addr_type; /* TCG_TYPE_I32 or TCG_TYPE_I64 */
20
21
-#ifdef CONFIG_SOFTMMU
22
int page_mask;
23
uint8_t page_bits;
24
uint8_t tlb_dyn_max_bits;
25
-#endif
26
uint8_t insn_start_words;
27
TCGBar guest_mo;
28
29
@@ -XXX,XX +XXX,XX @@ static inline bool temp_readonly(TCGTemp *ts)
30
return ts->kind >= TEMP_FIXED;
31
}
32
33
+#ifdef CONFIG_USER_ONLY
34
+extern bool tcg_use_softmmu;
35
+#else
36
+#define tcg_use_softmmu true
37
+#endif
38
+
39
extern __thread TCGContext *tcg_ctx;
40
extern const void *tcg_code_gen_epilogue;
41
extern uintptr_t tcg_splitwx_diff;
42
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/tcg/tcg-op-ldst.c
45
+++ b/tcg/tcg-op-ldst.c
46
@@ -XXX,XX +XXX,XX @@
47
48
static void check_max_alignment(unsigned a_bits)
49
{
50
-#if defined(CONFIG_SOFTMMU)
51
/*
52
* The requested alignment cannot overlap the TLB flags.
53
* FIXME: Must keep the count up-to-date with "exec/cpu-all.h".
54
*/
55
- tcg_debug_assert(a_bits + 5 <= tcg_ctx->page_bits);
56
-#endif
57
+ if (tcg_use_softmmu) {
58
+ tcg_debug_assert(a_bits + 5 <= tcg_ctx->page_bits);
59
+ }
60
}
61
62
static MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
63
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i64_chk(TCGv_i64 val, TCGTemp *addr, TCGArg idx,
64
*/
65
static bool use_two_i64_for_i128(MemOp mop)
66
{
67
-#ifdef CONFIG_SOFTMMU
68
/* Two softmmu tlb lookups is larger than one function call. */
69
- return false;
70
-#else
71
+ if (tcg_use_softmmu) {
72
+ return false;
73
+ }
74
+
75
/*
76
* For user-only, two 64-bit operations may well be smaller than a call.
77
* Determine if that would be legal for the requested atomicity.
78
@@ -XXX,XX +XXX,XX @@ static bool use_two_i64_for_i128(MemOp mop)
79
default:
80
g_assert_not_reached();
81
}
82
-#endif
83
}
84
85
static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
86
diff --git a/tcg/tcg.c b/tcg/tcg.c
87
index XXXXXXX..XXXXXXX 100644
88
--- a/tcg/tcg.c
89
+++ b/tcg/tcg.c
90
@@ -XXX,XX +XXX,XX @@ static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
91
MemOp host_atom, bool allow_two_ops)
92
__attribute__((unused));
93
94
+#ifdef CONFIG_USER_ONLY
95
+bool tcg_use_softmmu;
96
+#endif
97
+
98
TCGContext tcg_init_ctx;
99
__thread TCGContext *tcg_ctx;
100
101
@@ -XXX,XX +XXX,XX @@ static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
102
return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
103
}
104
105
-#if defined(CONFIG_SOFTMMU) && !defined(CONFIG_TCG_INTERPRETER)
106
-static int tlb_mask_table_ofs(TCGContext *s, int which)
107
+static int __attribute__((unused))
108
+tlb_mask_table_ofs(TCGContext *s, int which)
109
{
110
return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
111
sizeof(CPUNegativeOffsetState));
112
}
113
-#endif
114
115
/* Signal overflow, starting over with fewer guest insns. */
116
static G_NORETURN
117
--
118
2.34.1
119
120
diff view generated by jsdifflib
1
Since 6e6c4efed99, there has been a more appropriate range check
1
Provide a define to allow !tcg_use_softmmu code paths to
2
done later at the end of tcg_gen_code. There, a failing range
2
compile in system mode, but require elimination.
3
check results in a returned error code, which causes the TB to
4
be restarted at half the size.
5
3
6
Reported-by: Sai Pavan Boddu <saipava@xilinx.com>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Tested-by: Sai Pavan Boddu <sai.pavan.boddu@xilinx.com>
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
6
---
11
tcg/tcg.c | 9 +++++----
7
tcg/tcg.c | 4 ++++
12
1 file changed, 5 insertions(+), 4 deletions(-)
8
1 file changed, 4 insertions(+)
13
9
14
diff --git a/tcg/tcg.c b/tcg/tcg.c
10
diff --git a/tcg/tcg.c b/tcg/tcg.c
15
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/tcg.c
12
--- a/tcg/tcg.c
17
+++ b/tcg/tcg.c
13
+++ b/tcg/tcg.c
18
@@ -XXX,XX +XXX,XX @@ static bool tcg_resolve_relocs(TCGContext *s)
14
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece);
19
15
static int tcg_out_ldst_finalize(TCGContext *s);
20
static void set_jmp_reset_offset(TCGContext *s, int which)
16
#endif
21
{
17
22
- size_t off = tcg_current_code_size(s);
18
+#ifndef CONFIG_USER_ONLY
23
- s->tb_jmp_reset_offset[which] = off;
19
+#define guest_base ({ qemu_build_not_reached(); (uintptr_t)0; })
24
- /* Make sure that we didn't overflow the stored offset. */
20
+#endif
25
- assert(s->tb_jmp_reset_offset[which] == off);
21
+
26
+ /*
22
typedef struct TCGLdstHelperParam {
27
+ * We will check for overflow at the end of the opcode loop in
23
TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
28
+ * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
24
unsigned ntmp;
29
+ */
30
+ s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
31
}
32
33
#include "tcg-target.c.inc"
34
--
25
--
35
2.25.1
26
2.34.1
36
27
37
28
diff view generated by jsdifflib
New patch
1
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/arm/tcg-target.c.inc | 203 +++++++++++++++++++--------------------
5
1 file changed, 97 insertions(+), 106 deletions(-)
1
6
7
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/arm/tcg-target.c.inc
10
+++ b/tcg/arm/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
12
13
#define TCG_REG_TMP TCG_REG_R12
14
#define TCG_VEC_TMP TCG_REG_Q15
15
-#ifndef CONFIG_SOFTMMU
16
#define TCG_REG_GUEST_BASE TCG_REG_R11
17
-#endif
18
19
typedef enum {
20
COND_EQ = 0x0,
21
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
22
* r0-r3 will be overwritten when reading the tlb entry (system-mode only);
23
* r14 will be overwritten by the BLNE branching to the slow path.
24
*/
25
-#ifdef CONFIG_SOFTMMU
26
#define ALL_QLDST_REGS \
27
- (ALL_GENERAL_REGS & ~((1 << TCG_REG_R0) | (1 << TCG_REG_R1) | \
28
- (1 << TCG_REG_R2) | (1 << TCG_REG_R3) | \
29
- (1 << TCG_REG_R14)))
30
-#else
31
-#define ALL_QLDST_REGS (ALL_GENERAL_REGS & ~(1 << TCG_REG_R14))
32
-#endif
33
+ (ALL_GENERAL_REGS & ~((tcg_use_softmmu ? 0xf : 0) | (1 << TCG_REG_R14)))
34
35
/*
36
* ARM immediates for ALU instructions are made of an unsigned 8-bit
37
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
38
MemOp opc = get_memop(oi);
39
unsigned a_mask;
40
41
-#ifdef CONFIG_SOFTMMU
42
- *h = (HostAddress){
43
- .cond = COND_AL,
44
- .base = addrlo,
45
- .index = TCG_REG_R1,
46
- .index_scratch = true,
47
- };
48
-#else
49
- *h = (HostAddress){
50
- .cond = COND_AL,
51
- .base = addrlo,
52
- .index = guest_base ? TCG_REG_GUEST_BASE : -1,
53
- .index_scratch = false,
54
- };
55
-#endif
56
+ if (tcg_use_softmmu) {
57
+ *h = (HostAddress){
58
+ .cond = COND_AL,
59
+ .base = addrlo,
60
+ .index = TCG_REG_R1,
61
+ .index_scratch = true,
62
+ };
63
+ } else {
64
+ *h = (HostAddress){
65
+ .cond = COND_AL,
66
+ .base = addrlo,
67
+ .index = guest_base ? TCG_REG_GUEST_BASE : -1,
68
+ .index_scratch = false,
69
+ };
70
+ }
71
72
h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
73
a_mask = (1 << h->aa.align) - 1;
74
75
-#ifdef CONFIG_SOFTMMU
76
- int mem_index = get_mmuidx(oi);
77
- int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
78
- : offsetof(CPUTLBEntry, addr_write);
79
- int fast_off = tlb_mask_table_ofs(s, mem_index);
80
- unsigned s_mask = (1 << (opc & MO_SIZE)) - 1;
81
- TCGReg t_addr;
82
+ if (tcg_use_softmmu) {
83
+ int mem_index = get_mmuidx(oi);
84
+ int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
85
+ : offsetof(CPUTLBEntry, addr_write);
86
+ int fast_off = tlb_mask_table_ofs(s, mem_index);
87
+ unsigned s_mask = (1 << (opc & MO_SIZE)) - 1;
88
+ TCGReg t_addr;
89
90
- ldst = new_ldst_label(s);
91
- ldst->is_ld = is_ld;
92
- ldst->oi = oi;
93
- ldst->addrlo_reg = addrlo;
94
- ldst->addrhi_reg = addrhi;
95
+ ldst = new_ldst_label(s);
96
+ ldst->is_ld = is_ld;
97
+ ldst->oi = oi;
98
+ ldst->addrlo_reg = addrlo;
99
+ ldst->addrhi_reg = addrhi;
100
101
- /* Load cpu->neg.tlb.f[mmu_idx].{mask,table} into {r0,r1}. */
102
- QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
103
- QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4);
104
- tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off);
105
+ /* Load cpu->neg.tlb.f[mmu_idx].{mask,table} into {r0,r1}. */
106
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
107
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4);
108
+ tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off);
109
110
- /* Extract the tlb index from the address into R0. */
111
- tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo,
112
- SHIFT_IMM_LSR(s->page_bits - CPU_TLB_ENTRY_BITS));
113
+ /* Extract the tlb index from the address into R0. */
114
+ tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo,
115
+ SHIFT_IMM_LSR(s->page_bits - CPU_TLB_ENTRY_BITS));
116
117
- /*
118
- * Add the tlb_table pointer, creating the CPUTLBEntry address in R1.
119
- * Load the tlb comparator into R2/R3 and the fast path addend into R1.
120
- */
121
- QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
122
- if (cmp_off == 0) {
123
- if (s->addr_type == TCG_TYPE_I32) {
124
- tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
125
+ /*
126
+ * Add the tlb_table pointer, creating the CPUTLBEntry address in R1.
127
+ * Load the tlb comparator into R2/R3 and the fast path addend into R1.
128
+ */
129
+ QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
130
+ if (cmp_off == 0) {
131
+ if (s->addr_type == TCG_TYPE_I32) {
132
+ tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2,
133
+ TCG_REG_R1, TCG_REG_R0);
134
+ } else {
135
+ tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2,
136
+ TCG_REG_R1, TCG_REG_R0);
137
+ }
138
} else {
139
- tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
140
+ tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
141
+ TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0);
142
+ if (s->addr_type == TCG_TYPE_I32) {
143
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
144
+ } else {
145
+ tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
146
+ }
147
}
148
- } else {
149
- tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
150
- TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0);
151
- if (s->addr_type == TCG_TYPE_I32) {
152
- tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
153
+
154
+ /* Load the tlb addend. */
155
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R1,
156
+ offsetof(CPUTLBEntry, addend));
157
+
158
+ /*
159
+ * Check alignment, check comparators.
160
+ * Do this in 2-4 insns. Use MOVW for v7, if possible,
161
+ * to reduce the number of sequential conditional instructions.
162
+ * Almost all guests have at least 4k pages, which means that we need
163
+ * to clear at least 9 bits even for an 8-byte memory, which means it
164
+ * isn't worth checking for an immediate operand for BIC.
165
+ *
166
+ * For unaligned accesses, test the page of the last unit of alignment.
167
+ * This leaves the least significant alignment bits unchanged, and of
168
+ * course must be zero.
169
+ */
170
+ t_addr = addrlo;
171
+ if (a_mask < s_mask) {
172
+ t_addr = TCG_REG_R0;
173
+ tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr,
174
+ addrlo, s_mask - a_mask);
175
+ }
176
+ if (use_armv7_instructions && s->page_bits <= 16) {
177
+ tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(s->page_mask | a_mask));
178
+ tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
179
+ t_addr, TCG_REG_TMP, 0);
180
+ tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0,
181
+ TCG_REG_R2, TCG_REG_TMP, 0);
182
} else {
183
- tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
184
+ if (a_mask) {
185
+ tcg_debug_assert(a_mask <= 0xff);
186
+ tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
187
+ }
188
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, t_addr,
189
+ SHIFT_IMM_LSR(s->page_bits));
190
+ tcg_out_dat_reg(s, (a_mask ? COND_EQ : COND_AL), ARITH_CMP,
191
+ 0, TCG_REG_R2, TCG_REG_TMP,
192
+ SHIFT_IMM_LSL(s->page_bits));
193
}
194
- }
195
196
- /* Load the tlb addend. */
197
- tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R1,
198
- offsetof(CPUTLBEntry, addend));
199
-
200
- /*
201
- * Check alignment, check comparators.
202
- * Do this in 2-4 insns. Use MOVW for v7, if possible,
203
- * to reduce the number of sequential conditional instructions.
204
- * Almost all guests have at least 4k pages, which means that we need
205
- * to clear at least 9 bits even for an 8-byte memory, which means it
206
- * isn't worth checking for an immediate operand for BIC.
207
- *
208
- * For unaligned accesses, test the page of the last unit of alignment.
209
- * This leaves the least significant alignment bits unchanged, and of
210
- * course must be zero.
211
- */
212
- t_addr = addrlo;
213
- if (a_mask < s_mask) {
214
- t_addr = TCG_REG_R0;
215
- tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr,
216
- addrlo, s_mask - a_mask);
217
- }
218
- if (use_armv7_instructions && s->page_bits <= 16) {
219
- tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(s->page_mask | a_mask));
220
- tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
221
- t_addr, TCG_REG_TMP, 0);
222
- tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, 0);
223
- } else {
224
- if (a_mask) {
225
- tcg_debug_assert(a_mask <= 0xff);
226
- tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
227
+ if (s->addr_type != TCG_TYPE_I32) {
228
+ tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0);
229
}
230
- tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, t_addr,
231
- SHIFT_IMM_LSR(s->page_bits));
232
- tcg_out_dat_reg(s, (a_mask ? COND_EQ : COND_AL), ARITH_CMP,
233
- 0, TCG_REG_R2, TCG_REG_TMP,
234
- SHIFT_IMM_LSL(s->page_bits));
235
- }
236
-
237
- if (s->addr_type != TCG_TYPE_I32) {
238
- tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0);
239
- }
240
-#else
241
- if (a_mask) {
242
+ } else if (a_mask) {
243
ldst = new_ldst_label(s);
244
ldst->is_ld = is_ld;
245
ldst->oi = oi;
246
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
247
/* tst addr, #mask */
248
tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
249
}
250
-#endif
251
252
return ldst;
253
}
254
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
255
256
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
257
258
-#ifndef CONFIG_SOFTMMU
259
- if (guest_base) {
260
+ if (!tcg_use_softmmu && guest_base) {
261
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
262
tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
263
}
264
-#endif
265
266
tcg_out_b_reg(s, COND_AL, tcg_target_call_iarg_regs[1]);
267
268
--
269
2.34.1
270
271
diff view generated by jsdifflib
New patch
1
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/aarch64/tcg-target.c.inc | 177 +++++++++++++++++------------------
5
1 file changed, 88 insertions(+), 89 deletions(-)
1
6
7
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/aarch64/tcg-target.c.inc
10
+++ b/tcg/aarch64/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
12
#define TCG_REG_TMP2 TCG_REG_X30
13
#define TCG_VEC_TMP0 TCG_REG_V31
14
15
-#ifndef CONFIG_SOFTMMU
16
#define TCG_REG_GUEST_BASE TCG_REG_X28
17
-#endif
18
19
static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
20
{
21
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
22
s_bits == MO_128);
23
a_mask = (1 << h->aa.align) - 1;
24
25
-#ifdef CONFIG_SOFTMMU
26
- unsigned s_mask = (1u << s_bits) - 1;
27
- unsigned mem_index = get_mmuidx(oi);
28
- TCGReg addr_adj;
29
- TCGType mask_type;
30
- uint64_t compare_mask;
31
+ if (tcg_use_softmmu) {
32
+ unsigned s_mask = (1u << s_bits) - 1;
33
+ unsigned mem_index = get_mmuidx(oi);
34
+ TCGReg addr_adj;
35
+ TCGType mask_type;
36
+ uint64_t compare_mask;
37
38
- ldst = new_ldst_label(s);
39
- ldst->is_ld = is_ld;
40
- ldst->oi = oi;
41
- ldst->addrlo_reg = addr_reg;
42
-
43
- mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
44
- ? TCG_TYPE_I64 : TCG_TYPE_I32);
45
-
46
- /* Load cpu->neg.tlb.f[mmu_idx].{mask,table} into {tmp0,tmp1}. */
47
- QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
48
- QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
49
- tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0,
50
- tlb_mask_table_ofs(s, mem_index), 1, 0);
51
-
52
- /* Extract the TLB index from the address into X0. */
53
- tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
54
- TCG_REG_TMP0, TCG_REG_TMP0, addr_reg,
55
- s->page_bits - CPU_TLB_ENTRY_BITS);
56
-
57
- /* Add the tlb_table pointer, forming the CPUTLBEntry address in TMP1. */
58
- tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0);
59
-
60
- /* Load the tlb comparator into TMP0, and the fast path addend into TMP1. */
61
- QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
62
- tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP1,
63
- is_ld ? offsetof(CPUTLBEntry, addr_read)
64
- : offsetof(CPUTLBEntry, addr_write));
65
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
66
- offsetof(CPUTLBEntry, addend));
67
-
68
- /*
69
- * For aligned accesses, we check the first byte and include the alignment
70
- * bits within the address. For unaligned access, we check that we don't
71
- * cross pages using the address of the last byte of the access.
72
- */
73
- if (a_mask >= s_mask) {
74
- addr_adj = addr_reg;
75
- } else {
76
- addr_adj = TCG_REG_TMP2;
77
- tcg_out_insn(s, 3401, ADDI, addr_type,
78
- addr_adj, addr_reg, s_mask - a_mask);
79
- }
80
- compare_mask = (uint64_t)s->page_mask | a_mask;
81
-
82
- /* Store the page mask part of the address into TMP2. */
83
- tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2,
84
- addr_adj, compare_mask);
85
-
86
- /* Perform the address comparison. */
87
- tcg_out_cmp(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2, 0);
88
-
89
- /* If not equal, we jump to the slow path. */
90
- ldst->label_ptr[0] = s->code_ptr;
91
- tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
92
-
93
- h->base = TCG_REG_TMP1;
94
- h->index = addr_reg;
95
- h->index_ext = addr_type;
96
-#else
97
- if (a_mask) {
98
ldst = new_ldst_label(s);
99
-
100
ldst->is_ld = is_ld;
101
ldst->oi = oi;
102
ldst->addrlo_reg = addr_reg;
103
104
- /* tst addr, #mask */
105
- tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
106
+ mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
107
+ ? TCG_TYPE_I64 : TCG_TYPE_I32);
108
109
- /* b.ne slow_path */
110
+ /* Load cpu->neg.tlb.f[mmu_idx].{mask,table} into {tmp0,tmp1}. */
111
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
112
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
113
+ tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0,
114
+ tlb_mask_table_ofs(s, mem_index), 1, 0);
115
+
116
+ /* Extract the TLB index from the address into X0. */
117
+ tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
118
+ TCG_REG_TMP0, TCG_REG_TMP0, addr_reg,
119
+ s->page_bits - CPU_TLB_ENTRY_BITS);
120
+
121
+ /* Add the tlb_table pointer, forming the CPUTLBEntry address. */
122
+ tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0);
123
+
124
+ /* Load the tlb comparator into TMP0, and the fast path addend. */
125
+ QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
126
+ tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP1,
127
+ is_ld ? offsetof(CPUTLBEntry, addr_read)
128
+ : offsetof(CPUTLBEntry, addr_write));
129
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
130
+ offsetof(CPUTLBEntry, addend));
131
+
132
+ /*
133
+ * For aligned accesses, we check the first byte and include
134
+ * the alignment bits within the address. For unaligned access,
135
+ * we check that we don't cross pages using the address of the
136
+ * last byte of the access.
137
+ */
138
+ if (a_mask >= s_mask) {
139
+ addr_adj = addr_reg;
140
+ } else {
141
+ addr_adj = TCG_REG_TMP2;
142
+ tcg_out_insn(s, 3401, ADDI, addr_type,
143
+ addr_adj, addr_reg, s_mask - a_mask);
144
+ }
145
+ compare_mask = (uint64_t)s->page_mask | a_mask;
146
+
147
+ /* Store the page mask part of the address into TMP2. */
148
+ tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2,
149
+ addr_adj, compare_mask);
150
+
151
+ /* Perform the address comparison. */
152
+ tcg_out_cmp(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2, 0);
153
+
154
+ /* If not equal, we jump to the slow path. */
155
ldst->label_ptr[0] = s->code_ptr;
156
tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
157
- }
158
159
- if (guest_base || addr_type == TCG_TYPE_I32) {
160
- h->base = TCG_REG_GUEST_BASE;
161
+ h->base = TCG_REG_TMP1;
162
h->index = addr_reg;
163
h->index_ext = addr_type;
164
} else {
165
- h->base = addr_reg;
166
- h->index = TCG_REG_XZR;
167
- h->index_ext = TCG_TYPE_I64;
168
+ if (a_mask) {
169
+ ldst = new_ldst_label(s);
170
+
171
+ ldst->is_ld = is_ld;
172
+ ldst->oi = oi;
173
+ ldst->addrlo_reg = addr_reg;
174
+
175
+ /* tst addr, #mask */
176
+ tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
177
+
178
+ /* b.ne slow_path */
179
+ ldst->label_ptr[0] = s->code_ptr;
180
+ tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
181
+ }
182
+
183
+ if (guest_base || addr_type == TCG_TYPE_I32) {
184
+ h->base = TCG_REG_GUEST_BASE;
185
+ h->index = addr_reg;
186
+ h->index_ext = addr_type;
187
+ } else {
188
+ h->base = addr_reg;
189
+ h->index = TCG_REG_XZR;
190
+ h->index_ext = TCG_TYPE_I64;
191
+ }
192
}
193
-#endif
194
195
return ldst;
196
}
197
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
198
tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
199
CPU_TEMP_BUF_NLONGS * sizeof(long));
200
201
-#if !defined(CONFIG_SOFTMMU)
202
- /*
203
- * Note that XZR cannot be encoded in the address base register slot,
204
- * as that actually encodes SP. Depending on the guest, we may need
205
- * to zero-extend the guest address via the address index register slot,
206
- * therefore we need to load even a zero guest base into a register.
207
- */
208
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
209
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
210
-#endif
211
+ if (!tcg_use_softmmu) {
212
+ /*
213
+ * Note that XZR cannot be encoded in the address base register slot,
214
+ * as that actually encodes SP. Depending on the guest, we may need
215
+ * to zero-extend the guest address via the address index register slot,
216
+ * therefore we need to load even a zero guest base into a register.
217
+ */
218
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
219
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
220
+ }
221
222
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
223
tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
224
--
225
2.34.1
226
227
diff view generated by jsdifflib
New patch
1
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/i386/tcg-target.c.inc | 198 +++++++++++++++++++-------------------
5
1 file changed, 98 insertions(+), 100 deletions(-)
1
6
7
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/i386/tcg-target.c.inc
10
+++ b/tcg/i386/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
12
# define ALL_VECTOR_REGS 0x00ff0000u
13
# define ALL_BYTEL_REGS 0x0000000fu
14
#endif
15
-#ifdef CONFIG_SOFTMMU
16
-# define SOFTMMU_RESERVE_REGS ((1 << TCG_REG_L0) | (1 << TCG_REG_L1))
17
-#else
18
-# define SOFTMMU_RESERVE_REGS 0
19
-#endif
20
+#define SOFTMMU_RESERVE_REGS \
21
+ (tcg_use_softmmu ? (1 << TCG_REG_L0) | (1 << TCG_REG_L1) : 0)
22
23
/* For 64-bit, we always know that CMOV is available. */
24
#if TCG_TARGET_REG_BITS == 64
25
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
26
return true;
27
}
28
29
-#ifndef CONFIG_SOFTMMU
30
+#ifdef CONFIG_USER_ONLY
31
static HostAddress x86_guest_base = {
32
.index = -1
33
};
34
@@ -XXX,XX +XXX,XX @@ static inline int setup_guest_base_seg(void)
35
}
36
return 0;
37
}
38
+#define setup_guest_base_seg setup_guest_base_seg
39
#elif defined(__x86_64__) && \
40
(defined (__FreeBSD__) || defined (__FreeBSD_kernel__))
41
# include <machine/sysarch.h>
42
@@ -XXX,XX +XXX,XX @@ static inline int setup_guest_base_seg(void)
43
}
44
return 0;
45
}
46
+#define setup_guest_base_seg setup_guest_base_seg
47
+#endif
48
#else
49
-static inline int setup_guest_base_seg(void)
50
-{
51
- return 0;
52
-}
53
-#endif /* setup_guest_base_seg */
54
-#endif /* !SOFTMMU */
55
+# define x86_guest_base (*(HostAddress *)({ qemu_build_not_reached(); NULL; }))
56
+#endif /* CONFIG_USER_ONLY */
57
+#ifndef setup_guest_base_seg
58
+# define setup_guest_base_seg() 0
59
+#endif
60
61
#define MIN_TLB_MASK_TABLE_OFS INT_MIN
62
63
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
64
MemOp s_bits = opc & MO_SIZE;
65
unsigned a_mask;
66
67
-#ifdef CONFIG_SOFTMMU
68
- h->index = TCG_REG_L0;
69
- h->ofs = 0;
70
- h->seg = 0;
71
-#else
72
- *h = x86_guest_base;
73
-#endif
74
+ if (tcg_use_softmmu) {
75
+ h->index = TCG_REG_L0;
76
+ h->ofs = 0;
77
+ h->seg = 0;
78
+ } else {
79
+ *h = x86_guest_base;
80
+ }
81
h->base = addrlo;
82
h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128);
83
a_mask = (1 << h->aa.align) - 1;
84
85
-#ifdef CONFIG_SOFTMMU
86
- int cmp_ofs = is_ld ? offsetof(CPUTLBEntry, addr_read)
87
- : offsetof(CPUTLBEntry, addr_write);
88
- TCGType ttype = TCG_TYPE_I32;
89
- TCGType tlbtype = TCG_TYPE_I32;
90
- int trexw = 0, hrexw = 0, tlbrexw = 0;
91
- unsigned mem_index = get_mmuidx(oi);
92
- unsigned s_mask = (1 << s_bits) - 1;
93
- int fast_ofs = tlb_mask_table_ofs(s, mem_index);
94
- int tlb_mask;
95
+ if (tcg_use_softmmu) {
96
+ int cmp_ofs = is_ld ? offsetof(CPUTLBEntry, addr_read)
97
+ : offsetof(CPUTLBEntry, addr_write);
98
+ TCGType ttype = TCG_TYPE_I32;
99
+ TCGType tlbtype = TCG_TYPE_I32;
100
+ int trexw = 0, hrexw = 0, tlbrexw = 0;
101
+ unsigned mem_index = get_mmuidx(oi);
102
+ unsigned s_mask = (1 << s_bits) - 1;
103
+ int fast_ofs = tlb_mask_table_ofs(s, mem_index);
104
+ int tlb_mask;
105
106
- ldst = new_ldst_label(s);
107
- ldst->is_ld = is_ld;
108
- ldst->oi = oi;
109
- ldst->addrlo_reg = addrlo;
110
- ldst->addrhi_reg = addrhi;
111
+ ldst = new_ldst_label(s);
112
+ ldst->is_ld = is_ld;
113
+ ldst->oi = oi;
114
+ ldst->addrlo_reg = addrlo;
115
+ ldst->addrhi_reg = addrhi;
116
117
- if (TCG_TARGET_REG_BITS == 64) {
118
- ttype = s->addr_type;
119
- trexw = (ttype == TCG_TYPE_I32 ? 0 : P_REXW);
120
- if (TCG_TYPE_PTR == TCG_TYPE_I64) {
121
- hrexw = P_REXW;
122
- if (s->page_bits + s->tlb_dyn_max_bits > 32) {
123
- tlbtype = TCG_TYPE_I64;
124
- tlbrexw = P_REXW;
125
+ if (TCG_TARGET_REG_BITS == 64) {
126
+ ttype = s->addr_type;
127
+ trexw = (ttype == TCG_TYPE_I32 ? 0 : P_REXW);
128
+ if (TCG_TYPE_PTR == TCG_TYPE_I64) {
129
+ hrexw = P_REXW;
130
+ if (s->page_bits + s->tlb_dyn_max_bits > 32) {
131
+ tlbtype = TCG_TYPE_I64;
132
+ tlbrexw = P_REXW;
133
+ }
134
}
135
}
136
- }
137
138
- tcg_out_mov(s, tlbtype, TCG_REG_L0, addrlo);
139
- tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0,
140
- s->page_bits - CPU_TLB_ENTRY_BITS);
141
+ tcg_out_mov(s, tlbtype, TCG_REG_L0, addrlo);
142
+ tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0,
143
+ s->page_bits - CPU_TLB_ENTRY_BITS);
144
145
- tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0,
146
- fast_ofs + offsetof(CPUTLBDescFast, mask));
147
+ tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0,
148
+ fast_ofs + offsetof(CPUTLBDescFast, mask));
149
150
- tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L0, TCG_AREG0,
151
- fast_ofs + offsetof(CPUTLBDescFast, table));
152
+ tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L0, TCG_AREG0,
153
+ fast_ofs + offsetof(CPUTLBDescFast, table));
154
155
- /*
156
- * If the required alignment is at least as large as the access, simply
157
- * copy the address and mask. For lesser alignments, check that we don't
158
- * cross pages for the complete access.
159
- */
160
- if (a_mask >= s_mask) {
161
- tcg_out_mov(s, ttype, TCG_REG_L1, addrlo);
162
- } else {
163
- tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1,
164
- addrlo, s_mask - a_mask);
165
- }
166
- tlb_mask = s->page_mask | a_mask;
167
- tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0);
168
+ /*
169
+ * If the required alignment is at least as large as the access,
170
+ * simply copy the address and mask. For lesser alignments,
171
+ * check that we don't cross pages for the complete access.
172
+ */
173
+ if (a_mask >= s_mask) {
174
+ tcg_out_mov(s, ttype, TCG_REG_L1, addrlo);
175
+ } else {
176
+ tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1,
177
+ addrlo, s_mask - a_mask);
178
+ }
179
+ tlb_mask = s->page_mask | a_mask;
180
+ tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0);
181
182
- /* cmp 0(TCG_REG_L0), TCG_REG_L1 */
183
- tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw,
184
- TCG_REG_L1, TCG_REG_L0, cmp_ofs);
185
-
186
- /* jne slow_path */
187
- tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
188
- ldst->label_ptr[0] = s->code_ptr;
189
- s->code_ptr += 4;
190
-
191
- if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I64) {
192
- /* cmp 4(TCG_REG_L0), addrhi */
193
- tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, TCG_REG_L0, cmp_ofs + 4);
194
+ /* cmp 0(TCG_REG_L0), TCG_REG_L1 */
195
+ tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw,
196
+ TCG_REG_L1, TCG_REG_L0, cmp_ofs);
197
198
/* jne slow_path */
199
tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
200
- ldst->label_ptr[1] = s->code_ptr;
201
+ ldst->label_ptr[0] = s->code_ptr;
202
s->code_ptr += 4;
203
- }
204
205
- /* TLB Hit. */
206
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_L0, TCG_REG_L0,
207
- offsetof(CPUTLBEntry, addend));
208
-#else
209
- if (a_mask) {
210
+ if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I64) {
211
+ /* cmp 4(TCG_REG_L0), addrhi */
212
+ tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi,
213
+ TCG_REG_L0, cmp_ofs + 4);
214
+
215
+ /* jne slow_path */
216
+ tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
217
+ ldst->label_ptr[1] = s->code_ptr;
218
+ s->code_ptr += 4;
219
+ }
220
+
221
+ /* TLB Hit. */
222
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_L0, TCG_REG_L0,
223
+ offsetof(CPUTLBEntry, addend));
224
+ } else if (a_mask) {
225
ldst = new_ldst_label(s);
226
227
ldst->is_ld = is_ld;
228
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
229
ldst->label_ptr[0] = s->code_ptr;
230
s->code_ptr += 4;
231
}
232
-#endif
233
234
return ldst;
235
}
236
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
237
tcg_out_push(s, tcg_target_callee_save_regs[i]);
238
}
239
240
-#if TCG_TARGET_REG_BITS == 32
241
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
242
- (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
243
- tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
244
- /* jmp *tb. */
245
- tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
246
- (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
247
- + stack_addend);
248
-#else
249
-# if !defined(CONFIG_SOFTMMU)
250
- if (guest_base) {
251
+ if (!tcg_use_softmmu && guest_base) {
252
int seg = setup_guest_base_seg();
253
if (seg != 0) {
254
x86_guest_base.seg = seg;
255
} else if (guest_base == (int32_t)guest_base) {
256
x86_guest_base.ofs = guest_base;
257
} else {
258
+ assert(TCG_TARGET_REG_BITS == 64);
259
/* Choose R12 because, as a base, it requires a SIB byte. */
260
x86_guest_base.index = TCG_REG_R12;
261
tcg_out_movi(s, TCG_TYPE_PTR, x86_guest_base.index, guest_base);
262
tcg_regset_set_reg(s->reserved_regs, x86_guest_base.index);
263
}
264
}
265
-# endif
266
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
267
- tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
268
- /* jmp *tb. */
269
- tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
270
-#endif
271
+
272
+ if (TCG_TARGET_REG_BITS == 32) {
273
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
274
+ (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
275
+ tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
276
+ /* jmp *tb. */
277
+ tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
278
+ (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
279
+ + stack_addend);
280
+ } else {
281
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
282
+ tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
283
+ /* jmp *tb. */
284
+ tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
285
+ }
286
287
/*
288
* Return path for goto_ptr. Set return value to 0, a-la exit_tb,
289
--
290
2.34.1
291
292
diff view generated by jsdifflib
New patch
1
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/loongarch64/tcg-target.c.inc | 126 +++++++++++++++----------------
5
1 file changed, 61 insertions(+), 65 deletions(-)
1
6
7
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/loongarch64/tcg-target.c.inc
10
+++ b/tcg/loongarch64/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
12
return TCG_REG_A0 + slot;
13
}
14
15
-#ifndef CONFIG_SOFTMMU
16
-#define USE_GUEST_BASE (guest_base != 0)
17
#define TCG_GUEST_BASE_REG TCG_REG_S1
18
-#endif
19
20
#define TCG_CT_CONST_ZERO 0x100
21
#define TCG_CT_CONST_S12 0x200
22
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
23
h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
24
a_bits = h->aa.align;
25
26
-#ifdef CONFIG_SOFTMMU
27
- unsigned s_bits = opc & MO_SIZE;
28
- int mem_index = get_mmuidx(oi);
29
- int fast_ofs = tlb_mask_table_ofs(s, mem_index);
30
- int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
31
- int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
32
+ if (tcg_use_softmmu) {
33
+ unsigned s_bits = opc & MO_SIZE;
34
+ int mem_index = get_mmuidx(oi);
35
+ int fast_ofs = tlb_mask_table_ofs(s, mem_index);
36
+ int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
37
+ int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
38
39
- ldst = new_ldst_label(s);
40
- ldst->is_ld = is_ld;
41
- ldst->oi = oi;
42
- ldst->addrlo_reg = addr_reg;
43
-
44
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
45
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
46
-
47
- tcg_out_opc_srli_d(s, TCG_REG_TMP2, addr_reg,
48
- s->page_bits - CPU_TLB_ENTRY_BITS);
49
- tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
50
- tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
51
-
52
- /* Load the tlb comparator and the addend. */
53
- QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
54
- tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2,
55
- is_ld ? offsetof(CPUTLBEntry, addr_read)
56
- : offsetof(CPUTLBEntry, addr_write));
57
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
58
- offsetof(CPUTLBEntry, addend));
59
-
60
- /*
61
- * For aligned accesses, we check the first byte and include the alignment
62
- * bits within the address. For unaligned access, we check that we don't
63
- * cross pages using the address of the last byte of the access.
64
- */
65
- if (a_bits < s_bits) {
66
- unsigned a_mask = (1u << a_bits) - 1;
67
- unsigned s_mask = (1u << s_bits) - 1;
68
- tcg_out_addi(s, addr_type, TCG_REG_TMP1, addr_reg, s_mask - a_mask);
69
- } else {
70
- tcg_out_mov(s, addr_type, TCG_REG_TMP1, addr_reg);
71
- }
72
- tcg_out_opc_bstrins_d(s, TCG_REG_TMP1, TCG_REG_ZERO,
73
- a_bits, s->page_bits - 1);
74
-
75
- /* Compare masked address with the TLB entry. */
76
- ldst->label_ptr[0] = s->code_ptr;
77
- tcg_out_opc_bne(s, TCG_REG_TMP0, TCG_REG_TMP1, 0);
78
-
79
- h->index = TCG_REG_TMP2;
80
-#else
81
- if (a_bits) {
82
ldst = new_ldst_label(s);
83
-
84
ldst->is_ld = is_ld;
85
ldst->oi = oi;
86
ldst->addrlo_reg = addr_reg;
87
88
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
89
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
90
+
91
+ tcg_out_opc_srli_d(s, TCG_REG_TMP2, addr_reg,
92
+ s->page_bits - CPU_TLB_ENTRY_BITS);
93
+ tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
94
+ tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
95
+
96
+ /* Load the tlb comparator and the addend. */
97
+ QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
98
+ tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2,
99
+ is_ld ? offsetof(CPUTLBEntry, addr_read)
100
+ : offsetof(CPUTLBEntry, addr_write));
101
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
102
+ offsetof(CPUTLBEntry, addend));
103
+
104
/*
105
- * Without micro-architecture details, we don't know which of
106
- * bstrpick or andi is faster, so use bstrpick as it's not
107
- * constrained by imm field width. Not to say alignments >= 2^12
108
- * are going to happen any time soon.
109
+ * For aligned accesses, we check the first byte and include the
110
+ * alignment bits within the address. For unaligned access, we
111
+ * check that we don't cross pages using the address of the last
112
+ * byte of the access.
113
*/
114
- tcg_out_opc_bstrpick_d(s, TCG_REG_TMP1, addr_reg, 0, a_bits - 1);
115
+ if (a_bits < s_bits) {
116
+ unsigned a_mask = (1u << a_bits) - 1;
117
+ unsigned s_mask = (1u << s_bits) - 1;
118
+ tcg_out_addi(s, addr_type, TCG_REG_TMP1, addr_reg, s_mask - a_mask);
119
+ } else {
120
+ tcg_out_mov(s, addr_type, TCG_REG_TMP1, addr_reg);
121
+ }
122
+ tcg_out_opc_bstrins_d(s, TCG_REG_TMP1, TCG_REG_ZERO,
123
+ a_bits, s->page_bits - 1);
124
125
+ /* Compare masked address with the TLB entry. */
126
ldst->label_ptr[0] = s->code_ptr;
127
- tcg_out_opc_bne(s, TCG_REG_TMP1, TCG_REG_ZERO, 0);
128
- }
129
+ tcg_out_opc_bne(s, TCG_REG_TMP0, TCG_REG_TMP1, 0);
130
131
- h->index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
132
-#endif
133
+ h->index = TCG_REG_TMP2;
134
+ } else {
135
+ if (a_bits) {
136
+ ldst = new_ldst_label(s);
137
+
138
+ ldst->is_ld = is_ld;
139
+ ldst->oi = oi;
140
+ ldst->addrlo_reg = addr_reg;
141
+
142
+ /*
143
+ * Without micro-architecture details, we don't know which of
144
+ * bstrpick or andi is faster, so use bstrpick as it's not
145
+ * constrained by imm field width. Not to say alignments >= 2^12
146
+ * are going to happen any time soon.
147
+ */
148
+ tcg_out_opc_bstrpick_d(s, TCG_REG_TMP1, addr_reg, 0, a_bits - 1);
149
+
150
+ ldst->label_ptr[0] = s->code_ptr;
151
+ tcg_out_opc_bne(s, TCG_REG_TMP1, TCG_REG_ZERO, 0);
152
+ }
153
+
154
+ h->index = guest_base ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
155
+ }
156
157
if (addr_type == TCG_TYPE_I32) {
158
h->base = TCG_REG_TMP0;
159
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
160
TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
161
}
162
163
-#if !defined(CONFIG_SOFTMMU)
164
- if (USE_GUEST_BASE) {
165
+ if (!tcg_use_softmmu && guest_base) {
166
tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
167
tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
168
}
169
-#endif
170
171
/* Call generated code */
172
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
173
--
174
2.34.1
175
176
diff view generated by jsdifflib
New patch
1
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/mips/tcg-target.c.inc | 231 +++++++++++++++++++-------------------
5
1 file changed, 113 insertions(+), 118 deletions(-)
1
6
7
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/mips/tcg-target.c.inc
10
+++ b/tcg/mips/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
12
#define TCG_TMP2 TCG_REG_T8
13
#define TCG_TMP3 TCG_REG_T7
14
15
-#ifndef CONFIG_SOFTMMU
16
#define TCG_GUEST_BASE_REG TCG_REG_S7
17
-#endif
18
#if TCG_TARGET_REG_BITS == 64
19
#define TCG_REG_TB TCG_REG_S6
20
#else
21
-#define TCG_REG_TB (qemu_build_not_reached(), TCG_REG_ZERO)
22
+#define TCG_REG_TB ({ qemu_build_not_reached(); TCG_REG_ZERO; })
23
#endif
24
25
/* check if we really need so many registers :P */
26
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
27
a_bits = h->aa.align;
28
a_mask = (1 << a_bits) - 1;
29
30
-#ifdef CONFIG_SOFTMMU
31
- unsigned s_mask = (1 << s_bits) - 1;
32
- int mem_index = get_mmuidx(oi);
33
- int fast_off = tlb_mask_table_ofs(s, mem_index);
34
- int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
35
- int table_off = fast_off + offsetof(CPUTLBDescFast, table);
36
- int add_off = offsetof(CPUTLBEntry, addend);
37
- int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
38
- : offsetof(CPUTLBEntry, addr_write);
39
+ if (tcg_use_softmmu) {
40
+ unsigned s_mask = (1 << s_bits) - 1;
41
+ int mem_index = get_mmuidx(oi);
42
+ int fast_off = tlb_mask_table_ofs(s, mem_index);
43
+ int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
44
+ int table_off = fast_off + offsetof(CPUTLBDescFast, table);
45
+ int add_off = offsetof(CPUTLBEntry, addend);
46
+ int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
47
+ : offsetof(CPUTLBEntry, addr_write);
48
49
- ldst = new_ldst_label(s);
50
- ldst->is_ld = is_ld;
51
- ldst->oi = oi;
52
- ldst->addrlo_reg = addrlo;
53
- ldst->addrhi_reg = addrhi;
54
-
55
- /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
56
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_AREG0, mask_off);
57
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP1, TCG_AREG0, table_off);
58
-
59
- /* Extract the TLB index from the address into TMP3. */
60
- if (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32) {
61
- tcg_out_opc_sa(s, OPC_SRL, TCG_TMP3, addrlo,
62
- s->page_bits - CPU_TLB_ENTRY_BITS);
63
- } else {
64
- tcg_out_dsrl(s, TCG_TMP3, addrlo,
65
- s->page_bits - CPU_TLB_ENTRY_BITS);
66
- }
67
- tcg_out_opc_reg(s, OPC_AND, TCG_TMP3, TCG_TMP3, TCG_TMP0);
68
-
69
- /* Add the tlb_table pointer, creating the CPUTLBEntry address in TMP3. */
70
- tcg_out_opc_reg(s, ALIAS_PADD, TCG_TMP3, TCG_TMP3, TCG_TMP1);
71
-
72
- if (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32) {
73
- /* Load the (low half) tlb comparator. */
74
- tcg_out_ld(s, TCG_TYPE_I32, TCG_TMP0, TCG_TMP3,
75
- cmp_off + HOST_BIG_ENDIAN * 4);
76
- } else {
77
- tcg_out_ld(s, TCG_TYPE_I64, TCG_TMP0, TCG_TMP3, cmp_off);
78
- }
79
-
80
- if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) {
81
- /* Load the tlb addend for the fast path. */
82
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP3, TCG_TMP3, add_off);
83
- }
84
-
85
- /*
86
- * Mask the page bits, keeping the alignment bits to compare against.
87
- * For unaligned accesses, compare against the end of the access to
88
- * verify that it does not cross a page boundary.
89
- */
90
- tcg_out_movi(s, addr_type, TCG_TMP1, s->page_mask | a_mask);
91
- if (a_mask < s_mask) {
92
- if (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32) {
93
- tcg_out_opc_imm(s, OPC_ADDIU, TCG_TMP2, addrlo, s_mask - a_mask);
94
- } else {
95
- tcg_out_opc_imm(s, OPC_DADDIU, TCG_TMP2, addrlo, s_mask - a_mask);
96
- }
97
- tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP2);
98
- } else {
99
- tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrlo);
100
- }
101
-
102
- /* Zero extend a 32-bit guest address for a 64-bit host. */
103
- if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
104
- tcg_out_ext32u(s, TCG_TMP2, addrlo);
105
- addrlo = TCG_TMP2;
106
- }
107
-
108
- ldst->label_ptr[0] = s->code_ptr;
109
- tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0);
110
-
111
- /* Load and test the high half tlb comparator. */
112
- if (TCG_TARGET_REG_BITS == 32 && addr_type != TCG_TYPE_I32) {
113
- /* delay slot */
114
- tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF);
115
-
116
- /* Load the tlb addend for the fast path. */
117
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP3, TCG_TMP3, add_off);
118
-
119
- ldst->label_ptr[1] = s->code_ptr;
120
- tcg_out_opc_br(s, OPC_BNE, addrhi, TCG_TMP0);
121
- }
122
-
123
- /* delay slot */
124
- base = TCG_TMP3;
125
- tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP3, addrlo);
126
-#else
127
- if (a_mask && (use_mips32r6_instructions || a_bits != s_bits)) {
128
ldst = new_ldst_label(s);
129
-
130
ldst->is_ld = is_ld;
131
ldst->oi = oi;
132
ldst->addrlo_reg = addrlo;
133
ldst->addrhi_reg = addrhi;
134
135
- /* We are expecting a_bits to max out at 7, much lower than ANDI. */
136
- tcg_debug_assert(a_bits < 16);
137
- tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, addrlo, a_mask);
138
+ /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
139
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_AREG0, mask_off);
140
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP1, TCG_AREG0, table_off);
141
+
142
+ /* Extract the TLB index from the address into TMP3. */
143
+ if (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32) {
144
+ tcg_out_opc_sa(s, OPC_SRL, TCG_TMP3, addrlo,
145
+ s->page_bits - CPU_TLB_ENTRY_BITS);
146
+ } else {
147
+ tcg_out_dsrl(s, TCG_TMP3, addrlo,
148
+ s->page_bits - CPU_TLB_ENTRY_BITS);
149
+ }
150
+ tcg_out_opc_reg(s, OPC_AND, TCG_TMP3, TCG_TMP3, TCG_TMP0);
151
+
152
+ /* Add the tlb_table pointer, creating the CPUTLBEntry address. */
153
+ tcg_out_opc_reg(s, ALIAS_PADD, TCG_TMP3, TCG_TMP3, TCG_TMP1);
154
+
155
+ if (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32) {
156
+ /* Load the (low half) tlb comparator. */
157
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_TMP0, TCG_TMP3,
158
+ cmp_off + HOST_BIG_ENDIAN * 4);
159
+ } else {
160
+ tcg_out_ld(s, TCG_TYPE_I64, TCG_TMP0, TCG_TMP3, cmp_off);
161
+ }
162
+
163
+ if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) {
164
+ /* Load the tlb addend for the fast path. */
165
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP3, TCG_TMP3, add_off);
166
+ }
167
+
168
+ /*
169
+ * Mask the page bits, keeping the alignment bits to compare against.
170
+ * For unaligned accesses, compare against the end of the access to
171
+ * verify that it does not cross a page boundary.
172
+ */
173
+ tcg_out_movi(s, addr_type, TCG_TMP1, s->page_mask | a_mask);
174
+ if (a_mask < s_mask) {
175
+ tcg_out_opc_imm(s, (TCG_TARGET_REG_BITS == 32
176
+ || addr_type == TCG_TYPE_I32
177
+ ? OPC_ADDIU : OPC_DADDIU),
178
+ TCG_TMP2, addrlo, s_mask - a_mask);
179
+ tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP2);
180
+ } else {
181
+ tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrlo);
182
+ }
183
+
184
+ /* Zero extend a 32-bit guest address for a 64-bit host. */
185
+ if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
186
+ tcg_out_ext32u(s, TCG_TMP2, addrlo);
187
+ addrlo = TCG_TMP2;
188
+ }
189
190
ldst->label_ptr[0] = s->code_ptr;
191
- if (use_mips32r6_instructions) {
192
- tcg_out_opc_br(s, OPC_BNEZALC_R6, TCG_REG_ZERO, TCG_TMP0);
193
- } else {
194
- tcg_out_opc_br(s, OPC_BNEL, TCG_TMP0, TCG_REG_ZERO);
195
- tcg_out_nop(s);
196
- }
197
- }
198
+ tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0);
199
200
- base = addrlo;
201
- if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
202
- tcg_out_ext32u(s, TCG_REG_A0, base);
203
- base = TCG_REG_A0;
204
- }
205
- if (guest_base) {
206
- if (guest_base == (int16_t)guest_base) {
207
- tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_A0, base, guest_base);
208
- } else {
209
- tcg_out_opc_reg(s, ALIAS_PADD, TCG_REG_A0, base,
210
- TCG_GUEST_BASE_REG);
211
+ /* Load and test the high half tlb comparator. */
212
+ if (TCG_TARGET_REG_BITS == 32 && addr_type != TCG_TYPE_I32) {
213
+ /* delay slot */
214
+ tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF);
215
+
216
+ /* Load the tlb addend for the fast path. */
217
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP3, TCG_TMP3, add_off);
218
+
219
+ ldst->label_ptr[1] = s->code_ptr;
220
+ tcg_out_opc_br(s, OPC_BNE, addrhi, TCG_TMP0);
221
+ }
222
+
223
+ /* delay slot */
224
+ base = TCG_TMP3;
225
+ tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP3, addrlo);
226
+ } else {
227
+ if (a_mask && (use_mips32r6_instructions || a_bits != s_bits)) {
228
+ ldst = new_ldst_label(s);
229
+
230
+ ldst->is_ld = is_ld;
231
+ ldst->oi = oi;
232
+ ldst->addrlo_reg = addrlo;
233
+ ldst->addrhi_reg = addrhi;
234
+
235
+ /* We are expecting a_bits to max out at 7, much lower than ANDI. */
236
+ tcg_debug_assert(a_bits < 16);
237
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, addrlo, a_mask);
238
+
239
+ ldst->label_ptr[0] = s->code_ptr;
240
+ if (use_mips32r6_instructions) {
241
+ tcg_out_opc_br(s, OPC_BNEZALC_R6, TCG_REG_ZERO, TCG_TMP0);
242
+ } else {
243
+ tcg_out_opc_br(s, OPC_BNEL, TCG_TMP0, TCG_REG_ZERO);
244
+ tcg_out_nop(s);
245
+ }
246
+ }
247
+
248
+ base = addrlo;
249
+ if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
250
+ tcg_out_ext32u(s, TCG_REG_A0, base);
251
+ base = TCG_REG_A0;
252
+ }
253
+ if (guest_base) {
254
+ if (guest_base == (int16_t)guest_base) {
255
+ tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_A0, base, guest_base);
256
+ } else {
257
+ tcg_out_opc_reg(s, ALIAS_PADD, TCG_REG_A0, base,
258
+ TCG_GUEST_BASE_REG);
259
+ }
260
+ base = TCG_REG_A0;
261
}
262
- base = TCG_REG_A0;
263
}
264
-#endif
265
266
h->base = base;
267
return ldst;
268
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
269
TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
270
}
271
272
-#ifndef CONFIG_SOFTMMU
273
- if (guest_base != (int16_t)guest_base) {
274
+ if (!tcg_use_softmmu && guest_base != (int16_t)guest_base) {
275
/*
276
* The function call abi for n32 and n64 will have loaded $25 (t9)
277
* with the address of the prologue, so we can use that instead
278
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
279
TCG_TARGET_REG_BITS == 64 ? TCG_REG_T9 : 0);
280
tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
281
}
282
-#endif
283
284
if (TCG_TARGET_REG_BITS == 64) {
285
tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, tcg_target_call_iarg_regs[1]);
286
--
287
2.34.1
288
289
diff view generated by jsdifflib
New patch
1
Fix TCG_GUEST_BASE_REG to use 'TCG_REG_R30' instead of '30'.
1
2
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/ppc/tcg-target.c.inc | 284 ++++++++++++++++++++-------------------
7
1 file changed, 143 insertions(+), 141 deletions(-)
8
9
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/ppc/tcg-target.c.inc
12
+++ b/tcg/ppc/tcg-target.c.inc
13
@@ -XXX,XX +XXX,XX @@
14
15
#define have_isel (cpuinfo & CPUINFO_ISEL)
16
17
-#ifndef CONFIG_SOFTMMU
18
-#define TCG_GUEST_BASE_REG 30
19
-#endif
20
+#define TCG_GUEST_BASE_REG TCG_REG_R30
21
22
#ifdef CONFIG_DEBUG_TCG
23
static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = {
24
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
25
s_bits == MO_128);
26
a_bits = h->aa.align;
27
28
-#ifdef CONFIG_SOFTMMU
29
- int mem_index = get_mmuidx(oi);
30
- int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
31
- : offsetof(CPUTLBEntry, addr_write);
32
- int fast_off = tlb_mask_table_ofs(s, mem_index);
33
- int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
34
- int table_off = fast_off + offsetof(CPUTLBDescFast, table);
35
+ if (tcg_use_softmmu) {
36
+ int mem_index = get_mmuidx(oi);
37
+ int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
38
+ : offsetof(CPUTLBEntry, addr_write);
39
+ int fast_off = tlb_mask_table_ofs(s, mem_index);
40
+ int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
41
+ int table_off = fast_off + offsetof(CPUTLBDescFast, table);
42
43
- ldst = new_ldst_label(s);
44
- ldst->is_ld = is_ld;
45
- ldst->oi = oi;
46
- ldst->addrlo_reg = addrlo;
47
- ldst->addrhi_reg = addrhi;
48
-
49
- /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
50
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off);
51
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_AREG0, table_off);
52
-
53
- /* Extract the page index, shifted into place for tlb index. */
54
- if (TCG_TARGET_REG_BITS == 32) {
55
- tcg_out_shri32(s, TCG_REG_R0, addrlo,
56
- s->page_bits - CPU_TLB_ENTRY_BITS);
57
- } else {
58
- tcg_out_shri64(s, TCG_REG_R0, addrlo,
59
- s->page_bits - CPU_TLB_ENTRY_BITS);
60
- }
61
- tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0));
62
-
63
- /*
64
- * Load the (low part) TLB comparator into TMP2.
65
- * For 64-bit host, always load the entire 64-bit slot for simplicity.
66
- * We will ignore the high bits with tcg_out_cmp(..., addr_type).
67
- */
68
- if (TCG_TARGET_REG_BITS == 64) {
69
- if (cmp_off == 0) {
70
- tcg_out32(s, LDUX | TAB(TCG_REG_TMP2, TCG_REG_TMP1, TCG_REG_TMP2));
71
- } else {
72
- tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2));
73
- tcg_out_ld(s, TCG_TYPE_I64, TCG_REG_TMP2, TCG_REG_TMP1, cmp_off);
74
- }
75
- } else if (cmp_off == 0 && !HOST_BIG_ENDIAN) {
76
- tcg_out32(s, LWZUX | TAB(TCG_REG_TMP2, TCG_REG_TMP1, TCG_REG_TMP2));
77
- } else {
78
- tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2));
79
- tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1,
80
- cmp_off + 4 * HOST_BIG_ENDIAN);
81
- }
82
-
83
- /*
84
- * Load the TLB addend for use on the fast path.
85
- * Do this asap to minimize any load use delay.
86
- */
87
- if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) {
88
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
89
- offsetof(CPUTLBEntry, addend));
90
- }
91
-
92
- /* Clear the non-page, non-alignment bits from the address in R0. */
93
- if (TCG_TARGET_REG_BITS == 32) {
94
- /*
95
- * We don't support unaligned accesses on 32-bits.
96
- * Preserve the bottom bits and thus trigger a comparison
97
- * failure on unaligned accesses.
98
- */
99
- if (a_bits < s_bits) {
100
- a_bits = s_bits;
101
- }
102
- tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
103
- (32 - a_bits) & 31, 31 - s->page_bits);
104
- } else {
105
- TCGReg t = addrlo;
106
-
107
- /*
108
- * If the access is unaligned, we need to make sure we fail if we
109
- * cross a page boundary. The trick is to add the access size-1
110
- * to the address before masking the low bits. That will make the
111
- * address overflow to the next page if we cross a page boundary,
112
- * which will then force a mismatch of the TLB compare.
113
- */
114
- if (a_bits < s_bits) {
115
- unsigned a_mask = (1 << a_bits) - 1;
116
- unsigned s_mask = (1 << s_bits) - 1;
117
- tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
118
- t = TCG_REG_R0;
119
- }
120
-
121
- /* Mask the address for the requested alignment. */
122
- if (addr_type == TCG_TYPE_I32) {
123
- tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
124
- (32 - a_bits) & 31, 31 - s->page_bits);
125
- } else if (a_bits == 0) {
126
- tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - s->page_bits);
127
- } else {
128
- tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
129
- 64 - s->page_bits, s->page_bits - a_bits);
130
- tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, s->page_bits, 0);
131
- }
132
- }
133
-
134
- if (TCG_TARGET_REG_BITS == 32 && addr_type != TCG_TYPE_I32) {
135
- /* Low part comparison into cr7. */
136
- tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2,
137
- 0, 7, TCG_TYPE_I32);
138
-
139
- /* Load the high part TLB comparator into TMP2. */
140
- tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1,
141
- cmp_off + 4 * !HOST_BIG_ENDIAN);
142
-
143
- /* Load addend, deferred for this case. */
144
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
145
- offsetof(CPUTLBEntry, addend));
146
-
147
- /* High part comparison into cr6. */
148
- tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_TMP2, 0, 6, TCG_TYPE_I32);
149
-
150
- /* Combine comparisons into cr7. */
151
- tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
152
- } else {
153
- /* Full comparison into cr7. */
154
- tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2, 0, 7, addr_type);
155
- }
156
-
157
- /* Load a pointer into the current opcode w/conditional branch-link. */
158
- ldst->label_ptr[0] = s->code_ptr;
159
- tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
160
-
161
- h->base = TCG_REG_TMP1;
162
-#else
163
- if (a_bits) {
164
ldst = new_ldst_label(s);
165
ldst->is_ld = is_ld;
166
ldst->oi = oi;
167
ldst->addrlo_reg = addrlo;
168
ldst->addrhi_reg = addrhi;
169
170
- /* We are expecting a_bits to max out at 7, much lower than ANDI. */
171
- tcg_debug_assert(a_bits < 16);
172
- tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, (1 << a_bits) - 1));
173
+ /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
174
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off);
175
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_AREG0, table_off);
176
177
+ /* Extract the page index, shifted into place for tlb index. */
178
+ if (TCG_TARGET_REG_BITS == 32) {
179
+ tcg_out_shri32(s, TCG_REG_R0, addrlo,
180
+ s->page_bits - CPU_TLB_ENTRY_BITS);
181
+ } else {
182
+ tcg_out_shri64(s, TCG_REG_R0, addrlo,
183
+ s->page_bits - CPU_TLB_ENTRY_BITS);
184
+ }
185
+ tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0));
186
+
187
+ /*
188
+ * Load the (low part) TLB comparator into TMP2.
189
+ * For 64-bit host, always load the entire 64-bit slot for simplicity.
190
+ * We will ignore the high bits with tcg_out_cmp(..., addr_type).
191
+ */
192
+ if (TCG_TARGET_REG_BITS == 64) {
193
+ if (cmp_off == 0) {
194
+ tcg_out32(s, LDUX | TAB(TCG_REG_TMP2,
195
+ TCG_REG_TMP1, TCG_REG_TMP2));
196
+ } else {
197
+ tcg_out32(s, ADD | TAB(TCG_REG_TMP1,
198
+ TCG_REG_TMP1, TCG_REG_TMP2));
199
+ tcg_out_ld(s, TCG_TYPE_I64, TCG_REG_TMP2,
200
+ TCG_REG_TMP1, cmp_off);
201
+ }
202
+ } else if (cmp_off == 0 && !HOST_BIG_ENDIAN) {
203
+ tcg_out32(s, LWZUX | TAB(TCG_REG_TMP2,
204
+ TCG_REG_TMP1, TCG_REG_TMP2));
205
+ } else {
206
+ tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2));
207
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1,
208
+ cmp_off + 4 * HOST_BIG_ENDIAN);
209
+ }
210
+
211
+ /*
212
+ * Load the TLB addend for use on the fast path.
213
+ * Do this asap to minimize any load use delay.
214
+ */
215
+ if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) {
216
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
217
+ offsetof(CPUTLBEntry, addend));
218
+ }
219
+
220
+ /* Clear the non-page, non-alignment bits from the address in R0. */
221
+ if (TCG_TARGET_REG_BITS == 32) {
222
+ /*
223
+ * We don't support unaligned accesses on 32-bits.
224
+ * Preserve the bottom bits and thus trigger a comparison
225
+ * failure on unaligned accesses.
226
+ */
227
+ if (a_bits < s_bits) {
228
+ a_bits = s_bits;
229
+ }
230
+ tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
231
+ (32 - a_bits) & 31, 31 - s->page_bits);
232
+ } else {
233
+ TCGReg t = addrlo;
234
+
235
+ /*
236
+ * If the access is unaligned, we need to make sure we fail if we
237
+ * cross a page boundary. The trick is to add the access size-1
238
+ * to the address before masking the low bits. That will make the
239
+ * address overflow to the next page if we cross a page boundary,
240
+ * which will then force a mismatch of the TLB compare.
241
+ */
242
+ if (a_bits < s_bits) {
243
+ unsigned a_mask = (1 << a_bits) - 1;
244
+ unsigned s_mask = (1 << s_bits) - 1;
245
+ tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
246
+ t = TCG_REG_R0;
247
+ }
248
+
249
+ /* Mask the address for the requested alignment. */
250
+ if (addr_type == TCG_TYPE_I32) {
251
+ tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
252
+ (32 - a_bits) & 31, 31 - s->page_bits);
253
+ } else if (a_bits == 0) {
254
+ tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - s->page_bits);
255
+ } else {
256
+ tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
257
+ 64 - s->page_bits, s->page_bits - a_bits);
258
+ tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, s->page_bits, 0);
259
+ }
260
+ }
261
+
262
+ if (TCG_TARGET_REG_BITS == 32 && addr_type != TCG_TYPE_I32) {
263
+ /* Low part comparison into cr7. */
264
+ tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2,
265
+ 0, 7, TCG_TYPE_I32);
266
+
267
+ /* Load the high part TLB comparator into TMP2. */
268
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1,
269
+ cmp_off + 4 * !HOST_BIG_ENDIAN);
270
+
271
+ /* Load addend, deferred for this case. */
272
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
273
+ offsetof(CPUTLBEntry, addend));
274
+
275
+ /* High part comparison into cr6. */
276
+ tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_TMP2,
277
+ 0, 6, TCG_TYPE_I32);
278
+
279
+ /* Combine comparisons into cr7. */
280
+ tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
281
+ } else {
282
+ /* Full comparison into cr7. */
283
+ tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2,
284
+ 0, 7, addr_type);
285
+ }
286
+
287
+ /* Load a pointer into the current opcode w/conditional branch-link. */
288
ldst->label_ptr[0] = s->code_ptr;
289
- tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK);
290
- }
291
+ tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
292
293
- h->base = guest_base ? TCG_GUEST_BASE_REG : 0;
294
-#endif
295
+ h->base = TCG_REG_TMP1;
296
+ } else {
297
+ if (a_bits) {
298
+ ldst = new_ldst_label(s);
299
+ ldst->is_ld = is_ld;
300
+ ldst->oi = oi;
301
+ ldst->addrlo_reg = addrlo;
302
+ ldst->addrhi_reg = addrhi;
303
+
304
+ /* We are expecting a_bits to max out at 7, much lower than ANDI. */
305
+ tcg_debug_assert(a_bits < 16);
306
+ tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, (1 << a_bits) - 1));
307
+
308
+ ldst->label_ptr[0] = s->code_ptr;
309
+ tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK);
310
+ }
311
+
312
+ h->base = guest_base ? TCG_GUEST_BASE_REG : 0;
313
+ }
314
315
if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
316
/* Zero-extend the guest address for use in the host address. */
317
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
318
}
319
tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
320
321
-#ifndef CONFIG_SOFTMMU
322
- if (guest_base) {
323
+ if (!tcg_use_softmmu && guest_base) {
324
tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
325
tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
326
}
327
-#endif
328
329
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
330
tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
331
--
332
2.34.1
333
334
diff view generated by jsdifflib
New patch
1
Fixes: 92c041c59b ("tcg/riscv: Add the prologue generation and register the JIT")
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/riscv/tcg-target.c.inc | 6 ++++--
5
1 file changed, 4 insertions(+), 2 deletions(-)
1
6
7
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/riscv/tcg-target.c.inc
10
+++ b/tcg/riscv/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
12
}
13
14
#if !defined(CONFIG_SOFTMMU)
15
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
16
- tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
17
+ if (guest_base) {
18
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
19
+ tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
20
+ }
21
#endif
22
23
/* Call generated code */
24
--
25
2.34.1
diff view generated by jsdifflib
New patch
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
---
3
tcg/riscv/tcg-target.c.inc | 185 +++++++++++++++++++------------------
4
1 file changed, 94 insertions(+), 91 deletions(-)
1
5
6
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
7
index XXXXXXX..XXXXXXX 100644
8
--- a/tcg/riscv/tcg-target.c.inc
9
+++ b/tcg/riscv/tcg-target.c.inc
10
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
11
aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
12
a_mask = (1u << aa.align) - 1;
13
14
-#ifdef CONFIG_SOFTMMU
15
- unsigned s_bits = opc & MO_SIZE;
16
- unsigned s_mask = (1u << s_bits) - 1;
17
- int mem_index = get_mmuidx(oi);
18
- int fast_ofs = tlb_mask_table_ofs(s, mem_index);
19
- int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
20
- int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
21
- int compare_mask;
22
- TCGReg addr_adj;
23
+ if (tcg_use_softmmu) {
24
+ unsigned s_bits = opc & MO_SIZE;
25
+ unsigned s_mask = (1u << s_bits) - 1;
26
+ int mem_index = get_mmuidx(oi);
27
+ int fast_ofs = tlb_mask_table_ofs(s, mem_index);
28
+ int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
29
+ int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
30
+ int compare_mask;
31
+ TCGReg addr_adj;
32
33
- ldst = new_ldst_label(s);
34
- ldst->is_ld = is_ld;
35
- ldst->oi = oi;
36
- ldst->addrlo_reg = addr_reg;
37
-
38
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
39
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
40
-
41
- tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr_reg,
42
- s->page_bits - CPU_TLB_ENTRY_BITS);
43
- tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
44
- tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
45
-
46
- /*
47
- * For aligned accesses, we check the first byte and include the alignment
48
- * bits within the address. For unaligned access, we check that we don't
49
- * cross pages using the address of the last byte of the access.
50
- */
51
- addr_adj = addr_reg;
52
- if (a_mask < s_mask) {
53
- addr_adj = TCG_REG_TMP0;
54
- tcg_out_opc_imm(s, addr_type == TCG_TYPE_I32 ? OPC_ADDIW : OPC_ADDI,
55
- addr_adj, addr_reg, s_mask - a_mask);
56
- }
57
- compare_mask = s->page_mask | a_mask;
58
- if (compare_mask == sextreg(compare_mask, 0, 12)) {
59
- tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_adj, compare_mask);
60
- } else {
61
- tcg_out_movi(s, addr_type, TCG_REG_TMP1, compare_mask);
62
- tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addr_adj);
63
- }
64
-
65
- /* Load the tlb comparator and the addend. */
66
- QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
67
- tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2,
68
- is_ld ? offsetof(CPUTLBEntry, addr_read)
69
- : offsetof(CPUTLBEntry, addr_write));
70
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
71
- offsetof(CPUTLBEntry, addend));
72
-
73
- /* Compare masked address with the TLB entry. */
74
- ldst->label_ptr[0] = s->code_ptr;
75
- tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0);
76
-
77
- /* TLB Hit - translate address using addend. */
78
- if (addr_type != TCG_TYPE_I32) {
79
- tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, addr_reg, TCG_REG_TMP2);
80
- } else if (have_zba) {
81
- tcg_out_opc_reg(s, OPC_ADD_UW, TCG_REG_TMP0, addr_reg, TCG_REG_TMP2);
82
- } else {
83
- tcg_out_ext32u(s, TCG_REG_TMP0, addr_reg);
84
- tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP0, TCG_REG_TMP2);
85
- }
86
- *pbase = TCG_REG_TMP0;
87
-#else
88
- TCGReg base;
89
-
90
- if (a_mask) {
91
ldst = new_ldst_label(s);
92
ldst->is_ld = is_ld;
93
ldst->oi = oi;
94
ldst->addrlo_reg = addr_reg;
95
96
- /* We are expecting alignment max 7, so we can always use andi. */
97
- tcg_debug_assert(a_mask == sextreg(a_mask, 0, 12));
98
- tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, a_mask);
99
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
100
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
101
102
- ldst->label_ptr[0] = s->code_ptr;
103
- tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP1, TCG_REG_ZERO, 0);
104
- }
105
+ tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr_reg,
106
+ s->page_bits - CPU_TLB_ENTRY_BITS);
107
+ tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
108
+ tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
109
110
- if (guest_base != 0) {
111
- base = TCG_REG_TMP0;
112
- if (addr_type != TCG_TYPE_I32) {
113
- tcg_out_opc_reg(s, OPC_ADD, base, addr_reg, TCG_GUEST_BASE_REG);
114
- } else if (have_zba) {
115
- tcg_out_opc_reg(s, OPC_ADD_UW, base, addr_reg, TCG_GUEST_BASE_REG);
116
- } else {
117
- tcg_out_ext32u(s, base, addr_reg);
118
- tcg_out_opc_reg(s, OPC_ADD, base, base, TCG_GUEST_BASE_REG);
119
+ /*
120
+ * For aligned accesses, we check the first byte and include the
121
+ * alignment bits within the address. For unaligned access, we
122
+ * check that we don't cross pages using the address of the last
123
+ * byte of the access.
124
+ */
125
+ addr_adj = addr_reg;
126
+ if (a_mask < s_mask) {
127
+ addr_adj = TCG_REG_TMP0;
128
+ tcg_out_opc_imm(s, addr_type == TCG_TYPE_I32 ? OPC_ADDIW : OPC_ADDI,
129
+ addr_adj, addr_reg, s_mask - a_mask);
130
}
131
- } else if (addr_type != TCG_TYPE_I32) {
132
- base = addr_reg;
133
+ compare_mask = s->page_mask | a_mask;
134
+ if (compare_mask == sextreg(compare_mask, 0, 12)) {
135
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_adj, compare_mask);
136
+ } else {
137
+ tcg_out_movi(s, addr_type, TCG_REG_TMP1, compare_mask);
138
+ tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addr_adj);
139
+ }
140
+
141
+ /* Load the tlb comparator and the addend. */
142
+ QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
143
+ tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2,
144
+ is_ld ? offsetof(CPUTLBEntry, addr_read)
145
+ : offsetof(CPUTLBEntry, addr_write));
146
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
147
+ offsetof(CPUTLBEntry, addend));
148
+
149
+ /* Compare masked address with the TLB entry. */
150
+ ldst->label_ptr[0] = s->code_ptr;
151
+ tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0);
152
+
153
+ /* TLB Hit - translate address using addend. */
154
+ if (addr_type != TCG_TYPE_I32) {
155
+ tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, addr_reg, TCG_REG_TMP2);
156
+ } else if (have_zba) {
157
+ tcg_out_opc_reg(s, OPC_ADD_UW, TCG_REG_TMP0,
158
+ addr_reg, TCG_REG_TMP2);
159
+ } else {
160
+ tcg_out_ext32u(s, TCG_REG_TMP0, addr_reg);
161
+ tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0,
162
+ TCG_REG_TMP0, TCG_REG_TMP2);
163
+ }
164
+ *pbase = TCG_REG_TMP0;
165
} else {
166
- base = TCG_REG_TMP0;
167
- tcg_out_ext32u(s, base, addr_reg);
168
+ TCGReg base;
169
+
170
+ if (a_mask) {
171
+ ldst = new_ldst_label(s);
172
+ ldst->is_ld = is_ld;
173
+ ldst->oi = oi;
174
+ ldst->addrlo_reg = addr_reg;
175
+
176
+ /* We are expecting alignment max 7, so we can always use andi. */
177
+ tcg_debug_assert(a_mask == sextreg(a_mask, 0, 12));
178
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, a_mask);
179
+
180
+ ldst->label_ptr[0] = s->code_ptr;
181
+ tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP1, TCG_REG_ZERO, 0);
182
+ }
183
+
184
+ if (guest_base != 0) {
185
+ base = TCG_REG_TMP0;
186
+ if (addr_type != TCG_TYPE_I32) {
187
+ tcg_out_opc_reg(s, OPC_ADD, base, addr_reg,
188
+ TCG_GUEST_BASE_REG);
189
+ } else if (have_zba) {
190
+ tcg_out_opc_reg(s, OPC_ADD_UW, base, addr_reg,
191
+ TCG_GUEST_BASE_REG);
192
+ } else {
193
+ tcg_out_ext32u(s, base, addr_reg);
194
+ tcg_out_opc_reg(s, OPC_ADD, base, base, TCG_GUEST_BASE_REG);
195
+ }
196
+ } else if (addr_type != TCG_TYPE_I32) {
197
+ base = addr_reg;
198
+ } else {
199
+ base = TCG_REG_TMP0;
200
+ tcg_out_ext32u(s, base, addr_reg);
201
+ }
202
+ *pbase = base;
203
}
204
- *pbase = base;
205
-#endif
206
207
return ldst;
208
}
209
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
210
TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
211
}
212
213
-#if !defined(CONFIG_SOFTMMU)
214
- if (guest_base) {
215
+ if (!tcg_use_softmmu && guest_base) {
216
tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
217
tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
218
}
219
-#endif
220
221
/* Call generated code */
222
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
223
--
224
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/s390x/tcg-target.c.inc | 161 ++++++++++++++++++-------------------
5
1 file changed, 79 insertions(+), 82 deletions(-)
1
6
7
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/s390x/tcg-target.c.inc
10
+++ b/tcg/s390x/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@
12
/* A scratch register that may be be used throughout the backend. */
13
#define TCG_TMP0 TCG_REG_R1
14
15
-#ifndef CONFIG_SOFTMMU
16
#define TCG_GUEST_BASE_REG TCG_REG_R13
17
-#endif
18
19
/* All of the following instructions are prefixed with their instruction
20
format, and are defined as 8- or 16-bit quantities, even when the two
21
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
22
h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128);
23
a_mask = (1 << h->aa.align) - 1;
24
25
-#ifdef CONFIG_SOFTMMU
26
- unsigned s_mask = (1 << s_bits) - 1;
27
- int mem_index = get_mmuidx(oi);
28
- int fast_off = tlb_mask_table_ofs(s, mem_index);
29
- int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
30
- int table_off = fast_off + offsetof(CPUTLBDescFast, table);
31
- int ofs, a_off;
32
- uint64_t tlb_mask;
33
+ if (tcg_use_softmmu) {
34
+ unsigned s_mask = (1 << s_bits) - 1;
35
+ int mem_index = get_mmuidx(oi);
36
+ int fast_off = tlb_mask_table_ofs(s, mem_index);
37
+ int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
38
+ int table_off = fast_off + offsetof(CPUTLBDescFast, table);
39
+ int ofs, a_off;
40
+ uint64_t tlb_mask;
41
42
- ldst = new_ldst_label(s);
43
- ldst->is_ld = is_ld;
44
- ldst->oi = oi;
45
- ldst->addrlo_reg = addr_reg;
46
-
47
- tcg_out_sh64(s, RSY_SRLG, TCG_TMP0, addr_reg, TCG_REG_NONE,
48
- s->page_bits - CPU_TLB_ENTRY_BITS);
49
-
50
- tcg_out_insn(s, RXY, NG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, mask_off);
51
- tcg_out_insn(s, RXY, AG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, table_off);
52
-
53
- /*
54
- * For aligned accesses, we check the first byte and include the alignment
55
- * bits within the address. For unaligned access, we check that we don't
56
- * cross pages using the address of the last byte of the access.
57
- */
58
- a_off = (a_mask >= s_mask ? 0 : s_mask - a_mask);
59
- tlb_mask = (uint64_t)s->page_mask | a_mask;
60
- if (a_off == 0) {
61
- tgen_andi_risbg(s, TCG_REG_R0, addr_reg, tlb_mask);
62
- } else {
63
- tcg_out_insn(s, RX, LA, TCG_REG_R0, addr_reg, TCG_REG_NONE, a_off);
64
- tgen_andi(s, addr_type, TCG_REG_R0, tlb_mask);
65
- }
66
-
67
- if (is_ld) {
68
- ofs = offsetof(CPUTLBEntry, addr_read);
69
- } else {
70
- ofs = offsetof(CPUTLBEntry, addr_write);
71
- }
72
- if (addr_type == TCG_TYPE_I32) {
73
- ofs += HOST_BIG_ENDIAN * 4;
74
- tcg_out_insn(s, RX, C, TCG_REG_R0, TCG_TMP0, TCG_REG_NONE, ofs);
75
- } else {
76
- tcg_out_insn(s, RXY, CG, TCG_REG_R0, TCG_TMP0, TCG_REG_NONE, ofs);
77
- }
78
-
79
- tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
80
- ldst->label_ptr[0] = s->code_ptr++;
81
-
82
- h->index = TCG_TMP0;
83
- tcg_out_insn(s, RXY, LG, h->index, TCG_TMP0, TCG_REG_NONE,
84
- offsetof(CPUTLBEntry, addend));
85
-
86
- if (addr_type == TCG_TYPE_I32) {
87
- tcg_out_insn(s, RRE, ALGFR, h->index, addr_reg);
88
- h->base = TCG_REG_NONE;
89
- } else {
90
- h->base = addr_reg;
91
- }
92
- h->disp = 0;
93
-#else
94
- if (a_mask) {
95
ldst = new_ldst_label(s);
96
ldst->is_ld = is_ld;
97
ldst->oi = oi;
98
ldst->addrlo_reg = addr_reg;
99
100
- /* We are expecting a_bits to max out at 7, much lower than TMLL. */
101
- tcg_debug_assert(a_mask <= 0xffff);
102
- tcg_out_insn(s, RI, TMLL, addr_reg, a_mask);
103
+ tcg_out_sh64(s, RSY_SRLG, TCG_TMP0, addr_reg, TCG_REG_NONE,
104
+ s->page_bits - CPU_TLB_ENTRY_BITS);
105
106
- tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */
107
+ tcg_out_insn(s, RXY, NG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, mask_off);
108
+ tcg_out_insn(s, RXY, AG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, table_off);
109
+
110
+ /*
111
+ * For aligned accesses, we check the first byte and include the
112
+ * alignment bits within the address. For unaligned access, we
113
+ * check that we don't cross pages using the address of the last
114
+ * byte of the access.
115
+ */
116
+ a_off = (a_mask >= s_mask ? 0 : s_mask - a_mask);
117
+ tlb_mask = (uint64_t)s->page_mask | a_mask;
118
+ if (a_off == 0) {
119
+ tgen_andi_risbg(s, TCG_REG_R0, addr_reg, tlb_mask);
120
+ } else {
121
+ tcg_out_insn(s, RX, LA, TCG_REG_R0, addr_reg, TCG_REG_NONE, a_off);
122
+ tgen_andi(s, addr_type, TCG_REG_R0, tlb_mask);
123
+ }
124
+
125
+ if (is_ld) {
126
+ ofs = offsetof(CPUTLBEntry, addr_read);
127
+ } else {
128
+ ofs = offsetof(CPUTLBEntry, addr_write);
129
+ }
130
+ if (addr_type == TCG_TYPE_I32) {
131
+ ofs += HOST_BIG_ENDIAN * 4;
132
+ tcg_out_insn(s, RX, C, TCG_REG_R0, TCG_TMP0, TCG_REG_NONE, ofs);
133
+ } else {
134
+ tcg_out_insn(s, RXY, CG, TCG_REG_R0, TCG_TMP0, TCG_REG_NONE, ofs);
135
+ }
136
+
137
+ tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
138
ldst->label_ptr[0] = s->code_ptr++;
139
- }
140
141
- h->base = addr_reg;
142
- if (addr_type == TCG_TYPE_I32) {
143
- tcg_out_ext32u(s, TCG_TMP0, addr_reg);
144
- h->base = TCG_TMP0;
145
- }
146
- if (guest_base < 0x80000) {
147
- h->index = TCG_REG_NONE;
148
- h->disp = guest_base;
149
- } else {
150
- h->index = TCG_GUEST_BASE_REG;
151
+ h->index = TCG_TMP0;
152
+ tcg_out_insn(s, RXY, LG, h->index, TCG_TMP0, TCG_REG_NONE,
153
+ offsetof(CPUTLBEntry, addend));
154
+
155
+ if (addr_type == TCG_TYPE_I32) {
156
+ tcg_out_insn(s, RRE, ALGFR, h->index, addr_reg);
157
+ h->base = TCG_REG_NONE;
158
+ } else {
159
+ h->base = addr_reg;
160
+ }
161
h->disp = 0;
162
+ } else {
163
+ if (a_mask) {
164
+ ldst = new_ldst_label(s);
165
+ ldst->is_ld = is_ld;
166
+ ldst->oi = oi;
167
+ ldst->addrlo_reg = addr_reg;
168
+
169
+ /* We are expecting a_bits to max out at 7, much lower than TMLL. */
170
+ tcg_debug_assert(a_mask <= 0xffff);
171
+ tcg_out_insn(s, RI, TMLL, addr_reg, a_mask);
172
+
173
+ tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */
174
+ ldst->label_ptr[0] = s->code_ptr++;
175
+ }
176
+
177
+ h->base = addr_reg;
178
+ if (addr_type == TCG_TYPE_I32) {
179
+ tcg_out_ext32u(s, TCG_TMP0, addr_reg);
180
+ h->base = TCG_TMP0;
181
+ }
182
+ if (guest_base < 0x80000) {
183
+ h->index = TCG_REG_NONE;
184
+ h->disp = guest_base;
185
+ } else {
186
+ h->index = TCG_GUEST_BASE_REG;
187
+ h->disp = 0;
188
+ }
189
}
190
-#endif
191
192
return ldst;
193
}
194
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
195
TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET,
196
CPU_TEMP_BUF_NLONGS * sizeof(long));
197
198
-#ifndef CONFIG_SOFTMMU
199
- if (guest_base >= 0x80000) {
200
+ if (!tcg_use_softmmu && guest_base >= 0x80000) {
201
tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
202
tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
203
}
204
-#endif
205
206
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
207
208
--
209
2.34.1
210
211
diff view generated by jsdifflib
New patch
1
From: Mike Frysinger <vapier@gentoo.org>
1
2
3
Use of the API was removed a while back, but the define wasn't.
4
5
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Message-Id: <20231015010046.16020-1-vapier@gentoo.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
10
include/tcg/tcg-op.h | 2 --
11
1 file changed, 2 deletions(-)
12
13
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/include/tcg/tcg-op.h
16
+++ b/include/tcg/tcg-op.h
17
@@ -XXX,XX +XXX,XX @@ static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1,
18
typedef TCGv_i32 TCGv;
19
#define tcg_temp_new() tcg_temp_new_i32()
20
#define tcg_global_mem_new tcg_global_mem_new_i32
21
-#define tcg_temp_free tcg_temp_free_i32
22
#define tcgv_tl_temp tcgv_i32_temp
23
#define tcg_gen_qemu_ld_tl tcg_gen_qemu_ld_i32
24
#define tcg_gen_qemu_st_tl tcg_gen_qemu_st_i32
25
@@ -XXX,XX +XXX,XX @@ typedef TCGv_i32 TCGv;
26
typedef TCGv_i64 TCGv;
27
#define tcg_temp_new() tcg_temp_new_i64()
28
#define tcg_global_mem_new tcg_global_mem_new_i64
29
-#define tcg_temp_free tcg_temp_free_i64
30
#define tcgv_tl_temp tcgv_i64_temp
31
#define tcg_gen_qemu_ld_tl tcg_gen_qemu_ld_i64
32
#define tcg_gen_qemu_st_tl tcg_gen_qemu_st_i64
33
--
34
2.34.1
35
36
diff view generated by jsdifflib
New patch
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
---
3
tcg/tcg-op.c | 16 ++++++++--------
4
1 file changed, 8 insertions(+), 8 deletions(-)
1
5
6
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
7
index XXXXXXX..XXXXXXX 100644
8
--- a/tcg/tcg-op.c
9
+++ b/tcg/tcg-op.c
10
@@ -XXX,XX +XXX,XX @@ void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
11
tcg_gen_op3_i32(INDEX_op_divu_i32, ret, arg1, arg2);
12
} else if (TCG_TARGET_HAS_div2_i32) {
13
TCGv_i32 t0 = tcg_temp_ebb_new_i32();
14
- tcg_gen_movi_i32(t0, 0);
15
- tcg_gen_op5_i32(INDEX_op_divu2_i32, ret, t0, arg1, t0, arg2);
16
+ TCGv_i32 zero = tcg_constant_i32(0);
17
+ tcg_gen_op5_i32(INDEX_op_divu2_i32, ret, t0, arg1, zero, arg2);
18
tcg_temp_free_i32(t0);
19
} else {
20
gen_helper_divu_i32(ret, arg1, arg2);
21
@@ -XXX,XX +XXX,XX @@ void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
22
tcg_temp_free_i32(t0);
23
} else if (TCG_TARGET_HAS_div2_i32) {
24
TCGv_i32 t0 = tcg_temp_ebb_new_i32();
25
- tcg_gen_movi_i32(t0, 0);
26
- tcg_gen_op5_i32(INDEX_op_divu2_i32, t0, ret, arg1, t0, arg2);
27
+ TCGv_i32 zero = tcg_constant_i32(0);
28
+ tcg_gen_op5_i32(INDEX_op_divu2_i32, t0, ret, arg1, zero, arg2);
29
tcg_temp_free_i32(t0);
30
} else {
31
gen_helper_remu_i32(ret, arg1, arg2);
32
@@ -XXX,XX +XXX,XX @@ void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
33
tcg_gen_op3_i64(INDEX_op_divu_i64, ret, arg1, arg2);
34
} else if (TCG_TARGET_HAS_div2_i64) {
35
TCGv_i64 t0 = tcg_temp_ebb_new_i64();
36
- tcg_gen_movi_i64(t0, 0);
37
- tcg_gen_op5_i64(INDEX_op_divu2_i64, ret, t0, arg1, t0, arg2);
38
+ TCGv_i64 zero = tcg_constant_i64(0);
39
+ tcg_gen_op5_i64(INDEX_op_divu2_i64, ret, t0, arg1, zero, arg2);
40
tcg_temp_free_i64(t0);
41
} else {
42
gen_helper_divu_i64(ret, arg1, arg2);
43
@@ -XXX,XX +XXX,XX @@ void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
44
tcg_temp_free_i64(t0);
45
} else if (TCG_TARGET_HAS_div2_i64) {
46
TCGv_i64 t0 = tcg_temp_ebb_new_i64();
47
- tcg_gen_movi_i64(t0, 0);
48
- tcg_gen_op5_i64(INDEX_op_divu2_i64, t0, ret, arg1, t0, arg2);
49
+ TCGv_i64 zero = tcg_constant_i64(0);
50
+ tcg_gen_op5_i64(INDEX_op_divu2_i64, t0, ret, arg1, zero, arg2);
51
tcg_temp_free_i64(t0);
52
} else {
53
gen_helper_remu_i64(ret, arg1, arg2);
54
--
55
2.34.1
diff view generated by jsdifflib
1
This reverts commit cd0372c515c4732d8bd3777cdd995c139c7ed7ea.
1
We already register allocate through extended basic blocks,
2
optimize through extended basic blocks as well.
2
3
3
The patch is incorrect in that it retains copies between globals and
4
non-local temps, and non-local temps still die at the end of the BB.
5
6
Failing test case for hppa:
7
8
    .globl    _start
9
_start:
10
    cmpiclr,=    0x24,%r19,%r0
11
    cmpiclr,<>    0x2f,%r19,%r19
12
13
---- 00010057 0001005b
14
movi_i32 tmp0,$0x24
15
sub_i32 tmp1,tmp0,r19
16
mov_i32 tmp2,tmp0
17
mov_i32 tmp3,r19
18
movi_i32 tmp1,$0x0
19
20
---- 0001005b 0001005f
21
brcond_i32 tmp2,tmp3,eq,$L1
22
movi_i32 tmp0,$0x2f
23
sub_i32 tmp1,tmp0,r19
24
mov_i32 tmp2,tmp0
25
mov_i32 tmp3,r19
26
movi_i32 tmp1,$0x0
27
mov_i32 r19,tmp1
28
setcond_i32 psw_n,tmp2,tmp3,ne
29
set_label $L1
30
31
In this case, both copies of "mov_i32 tmp3,r19" are removed. The
32
second because opt thought it was redundant. The first is removed
33
later by liveness because tmp3 is known to be dead. This leaves
34
the setcond_i32 with an uninitialized input.
35
36
Revert the entire patch for 5.2, and a proper optimization across
37
the branch may be considered for the next development cycle.
38
39
Reported-by: qemu@igor2.repo.hu
40
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
41
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
42
---
5
---
43
tcg/optimize.c | 35 +++++++++++++++++------------------
6
tcg/optimize.c | 8 +++++---
44
1 file changed, 17 insertions(+), 18 deletions(-)
7
1 file changed, 5 insertions(+), 3 deletions(-)
45
8
46
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
47
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
48
--- a/tcg/optimize.c
11
--- a/tcg/optimize.c
49
+++ b/tcg/optimize.c
12
+++ b/tcg/optimize.c
50
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
13
@@ -XXX,XX +XXX,XX @@ static void finish_folding(OptContext *ctx, TCGOp *op)
51
}
14
int i, nb_oargs;
52
}
15
53
}
16
/*
54
- /* fall through */
17
- * For an opcode that ends a BB, reset all temp data.
55
+ goto do_reset_output;
18
- * We do no cross-BB optimization.
56
19
+ * We only optimize extended basic blocks. If the opcode ends a BB
57
default:
20
+ * and is not a conditional branch, reset all temp data.
58
do_default:
21
*/
59
- /*
22
if (def->flags & TCG_OPF_BB_END) {
60
- * Default case: we know nothing about operation (or were unable
23
- memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
61
- * to compute the operation result) so no propagation is done.
24
ctx->prev_mb = NULL;
62
- */
25
+ if (!(def->flags & TCG_OPF_COND_BRANCH)) {
63
- for (i = 0; i < nb_oargs; i++) {
26
+ memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
64
- reset_temp(op->args[i]);
27
+ }
65
- /*
28
return;
66
- * Save the corresponding known-zero bits mask for the
29
}
67
- * first output argument (only one supported so far).
30
68
- */
69
- if (i == 0) {
70
- arg_info(op->args[i])->mask = mask;
71
+ /* Default case: we know nothing about operation (or were unable
72
+ to compute the operation result) so no propagation is done.
73
+ We trash everything if the operation is the end of a basic
74
+ block, otherwise we only trash the output args. "mask" is
75
+ the non-zero bits mask for the first output arg. */
76
+ if (def->flags & TCG_OPF_BB_END) {
77
+ bitmap_zero(temps_used.l, nb_temps);
78
+ } else {
79
+ do_reset_output:
80
+ for (i = 0; i < nb_oargs; i++) {
81
+ reset_temp(op->args[i]);
82
+ /* Save the corresponding known-zero bits mask for the
83
+ first output argument (only one supported so far). */
84
+ if (i == 0) {
85
+ arg_info(op->args[i])->mask = mask;
86
+ }
87
}
88
}
89
break;
90
-
91
- case INDEX_op_set_label:
92
- /* Trash everything at the start of a new extended bb. */
93
- bitmap_zero(temps_used.l, nb_temps);
94
- break;
95
}
96
97
/* Eliminate duplicate and redundant fence instructions. */
98
--
31
--
99
2.25.1
32
2.34.1
100
101
diff view generated by jsdifflib
New patch
1
Do not require the translators to jump through concat and
2
extract of i64 in order to move values to and from env.
1
3
4
Tested-by: Song Gao <gaosong@loongson.cn>
5
Reviewed-by: Song Gao <gaosong@loongson.cn>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
include/tcg/tcg-op-common.h | 3 +++
10
tcg/tcg-op.c | 22 ++++++++++++++++++++++
11
2 files changed, 25 insertions(+)
12
13
diff --git a/include/tcg/tcg-op-common.h b/include/tcg/tcg-op-common.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/include/tcg/tcg-op-common.h
16
+++ b/include/tcg/tcg-op-common.h
17
@@ -XXX,XX +XXX,XX @@ void tcg_gen_mov_i128(TCGv_i128 dst, TCGv_i128 src);
18
void tcg_gen_extr_i128_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i128 arg);
19
void tcg_gen_concat_i64_i128(TCGv_i128 ret, TCGv_i64 lo, TCGv_i64 hi);
20
21
+void tcg_gen_ld_i128(TCGv_i128 ret, TCGv_ptr base, tcg_target_long offset);
22
+void tcg_gen_st_i128(TCGv_i128 val, TCGv_ptr base, tcg_target_long offset);
23
+
24
static inline void tcg_gen_concat32_i64(TCGv_i64 ret, TCGv_i64 lo, TCGv_i64 hi)
25
{
26
tcg_gen_deposit_i64(ret, lo, hi, 32, 32);
27
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/tcg/tcg-op.c
30
+++ b/tcg/tcg-op.c
31
@@ -XXX,XX +XXX,XX @@ void tcg_gen_mov_i128(TCGv_i128 dst, TCGv_i128 src)
32
}
33
}
34
35
+void tcg_gen_ld_i128(TCGv_i128 ret, TCGv_ptr base, tcg_target_long offset)
36
+{
37
+ if (HOST_BIG_ENDIAN) {
38
+ tcg_gen_ld_i64(TCGV128_HIGH(ret), base, offset);
39
+ tcg_gen_ld_i64(TCGV128_LOW(ret), base, offset + 8);
40
+ } else {
41
+ tcg_gen_ld_i64(TCGV128_LOW(ret), base, offset);
42
+ tcg_gen_ld_i64(TCGV128_HIGH(ret), base, offset + 8);
43
+ }
44
+}
45
+
46
+void tcg_gen_st_i128(TCGv_i128 val, TCGv_ptr base, tcg_target_long offset)
47
+{
48
+ if (HOST_BIG_ENDIAN) {
49
+ tcg_gen_st_i64(TCGV128_HIGH(val), base, offset);
50
+ tcg_gen_st_i64(TCGV128_LOW(val), base, offset + 8);
51
+ } else {
52
+ tcg_gen_st_i64(TCGV128_LOW(val), base, offset);
53
+ tcg_gen_st_i64(TCGV128_HIGH(val), base, offset + 8);
54
+ }
55
+}
56
+
57
/* QEMU specific operations. */
58
59
void tcg_gen_exit_tb(const TranslationBlock *tb, unsigned idx)
60
--
61
2.34.1
62
63
diff view generated by jsdifflib
New patch
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
---
3
target/i386/tcg/translate.c | 63 +++++++++++++++++--------------------
4
1 file changed, 29 insertions(+), 34 deletions(-)
1
5
6
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
7
index XXXXXXX..XXXXXXX 100644
8
--- a/target/i386/tcg/translate.c
9
+++ b/target/i386/tcg/translate.c
10
@@ -XXX,XX +XXX,XX @@ static inline void gen_stq_env_A0(DisasContext *s, int offset)
11
12
static inline void gen_ldo_env_A0(DisasContext *s, int offset, bool align)
13
{
14
+ MemOp atom = (s->cpuid_ext_features & CPUID_EXT_AVX
15
+ ? MO_ATOM_IFALIGN : MO_ATOM_IFALIGN_PAIR);
16
+ MemOp mop = MO_128 | MO_LE | atom | (align ? MO_ALIGN_16 : 0);
17
int mem_index = s->mem_index;
18
- tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index,
19
- MO_LEUQ | (align ? MO_ALIGN_16 : 0));
20
- tcg_gen_st_i64(s->tmp1_i64, tcg_env, offset + offsetof(XMMReg, XMM_Q(0)));
21
- tcg_gen_addi_tl(s->tmp0, s->A0, 8);
22
- tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
23
- tcg_gen_st_i64(s->tmp1_i64, tcg_env, offset + offsetof(XMMReg, XMM_Q(1)));
24
+ TCGv_i128 t = tcg_temp_new_i128();
25
+
26
+ tcg_gen_qemu_ld_i128(t, s->A0, mem_index, mop);
27
+ tcg_gen_st_i128(t, tcg_env, offset);
28
}
29
30
static inline void gen_sto_env_A0(DisasContext *s, int offset, bool align)
31
{
32
+ MemOp atom = (s->cpuid_ext_features & CPUID_EXT_AVX
33
+ ? MO_ATOM_IFALIGN : MO_ATOM_IFALIGN_PAIR);
34
+ MemOp mop = MO_128 | MO_LE | atom | (align ? MO_ALIGN_16 : 0);
35
int mem_index = s->mem_index;
36
- tcg_gen_ld_i64(s->tmp1_i64, tcg_env, offset + offsetof(XMMReg, XMM_Q(0)));
37
- tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index,
38
- MO_LEUQ | (align ? MO_ALIGN_16 : 0));
39
- tcg_gen_addi_tl(s->tmp0, s->A0, 8);
40
- tcg_gen_ld_i64(s->tmp1_i64, tcg_env, offset + offsetof(XMMReg, XMM_Q(1)));
41
- tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
42
+ TCGv_i128 t = tcg_temp_new_i128();
43
+
44
+ tcg_gen_ld_i128(t, tcg_env, offset);
45
+ tcg_gen_qemu_st_i128(t, s->A0, mem_index, mop);
46
}
47
48
static void gen_ldy_env_A0(DisasContext *s, int offset, bool align)
49
{
50
+ MemOp mop = MO_128 | MO_LE | MO_ATOM_IFALIGN_PAIR;
51
int mem_index = s->mem_index;
52
- tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index,
53
- MO_LEUQ | (align ? MO_ALIGN_32 : 0));
54
- tcg_gen_st_i64(s->tmp1_i64, tcg_env, offset + offsetof(YMMReg, YMM_Q(0)));
55
- tcg_gen_addi_tl(s->tmp0, s->A0, 8);
56
- tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
57
- tcg_gen_st_i64(s->tmp1_i64, tcg_env, offset + offsetof(YMMReg, YMM_Q(1)));
58
+ TCGv_i128 t0 = tcg_temp_new_i128();
59
+ TCGv_i128 t1 = tcg_temp_new_i128();
60
61
+ tcg_gen_qemu_ld_i128(t0, s->A0, mem_index, mop | (align ? MO_ALIGN_32 : 0));
62
tcg_gen_addi_tl(s->tmp0, s->A0, 16);
63
- tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
64
- tcg_gen_st_i64(s->tmp1_i64, tcg_env, offset + offsetof(YMMReg, YMM_Q(2)));
65
- tcg_gen_addi_tl(s->tmp0, s->A0, 24);
66
- tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
67
- tcg_gen_st_i64(s->tmp1_i64, tcg_env, offset + offsetof(YMMReg, YMM_Q(3)));
68
+ tcg_gen_qemu_ld_i128(t1, s->tmp0, mem_index, mop);
69
+
70
+ tcg_gen_st_i128(t0, tcg_env, offset + offsetof(YMMReg, YMM_X(0)));
71
+ tcg_gen_st_i128(t1, tcg_env, offset + offsetof(YMMReg, YMM_X(1)));
72
}
73
74
static void gen_sty_env_A0(DisasContext *s, int offset, bool align)
75
{
76
+ MemOp mop = MO_128 | MO_LE | MO_ATOM_IFALIGN_PAIR;
77
int mem_index = s->mem_index;
78
- tcg_gen_ld_i64(s->tmp1_i64, tcg_env, offset + offsetof(YMMReg, YMM_Q(0)));
79
- tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index,
80
- MO_LEUQ | (align ? MO_ALIGN_32 : 0));
81
- tcg_gen_addi_tl(s->tmp0, s->A0, 8);
82
- tcg_gen_ld_i64(s->tmp1_i64, tcg_env, offset + offsetof(YMMReg, YMM_Q(1)));
83
- tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
84
+ TCGv_i128 t = tcg_temp_new_i128();
85
+
86
+ tcg_gen_ld_i128(t, tcg_env, offset + offsetof(YMMReg, YMM_X(0)));
87
+ tcg_gen_qemu_st_i128(t, s->A0, mem_index, mop | (align ? MO_ALIGN_32 : 0));
88
tcg_gen_addi_tl(s->tmp0, s->A0, 16);
89
- tcg_gen_ld_i64(s->tmp1_i64, tcg_env, offset + offsetof(YMMReg, YMM_Q(2)));
90
- tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
91
- tcg_gen_addi_tl(s->tmp0, s->A0, 24);
92
- tcg_gen_ld_i64(s->tmp1_i64, tcg_env, offset + offsetof(YMMReg, YMM_Q(3)));
93
- tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
94
+ tcg_gen_ld_i128(t, tcg_env, offset + offsetof(YMMReg, YMM_X(1)));
95
+ tcg_gen_qemu_st_i128(t, s->tmp0, mem_index, mop);
96
}
97
98
#include "decode-new.h"
99
--
100
2.34.1
diff view generated by jsdifflib
New patch
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
2
3
This can be useful to write a shift bit extraction that does not
4
depend on TARGET_LONG_BITS.
5
6
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
7
Message-Id: <20231019104648.389942-15-pbonzini@redhat.com>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
10
include/tcg/tcg-op-common.h | 4 ++++
11
include/tcg/tcg-op.h | 2 ++
12
tcg/tcg-op.c | 12 ++++++++++++
13
3 files changed, 18 insertions(+)
14
15
diff --git a/include/tcg/tcg-op-common.h b/include/tcg/tcg-op-common.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/include/tcg/tcg-op-common.h
18
+++ b/include/tcg/tcg-op-common.h
19
@@ -XXX,XX +XXX,XX @@ void tcg_gen_setcondi_i32(TCGCond cond, TCGv_i32 ret,
20
TCGv_i32 arg1, int32_t arg2);
21
void tcg_gen_negsetcond_i32(TCGCond cond, TCGv_i32 ret,
22
TCGv_i32 arg1, TCGv_i32 arg2);
23
+void tcg_gen_negsetcondi_i32(TCGCond cond, TCGv_i32 ret,
24
+ TCGv_i32 arg1, int32_t arg2);
25
void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 c1,
26
TCGv_i32 c2, TCGv_i32 v1, TCGv_i32 v2);
27
void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
28
@@ -XXX,XX +XXX,XX @@ void tcg_gen_setcondi_i64(TCGCond cond, TCGv_i64 ret,
29
TCGv_i64 arg1, int64_t arg2);
30
void tcg_gen_negsetcond_i64(TCGCond cond, TCGv_i64 ret,
31
TCGv_i64 arg1, TCGv_i64 arg2);
32
+void tcg_gen_negsetcondi_i64(TCGCond cond, TCGv_i64 ret,
33
+ TCGv_i64 arg1, int64_t arg2);
34
void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 c1,
35
TCGv_i64 c2, TCGv_i64 v1, TCGv_i64 v2);
36
void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
37
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
38
index XXXXXXX..XXXXXXX 100644
39
--- a/include/tcg/tcg-op.h
40
+++ b/include/tcg/tcg-op.h
41
@@ -XXX,XX +XXX,XX @@ DEF_ATOMIC2(tcg_gen_atomic_umax_fetch, i64)
42
#define tcg_gen_setcond_tl tcg_gen_setcond_i64
43
#define tcg_gen_setcondi_tl tcg_gen_setcondi_i64
44
#define tcg_gen_negsetcond_tl tcg_gen_negsetcond_i64
45
+#define tcg_gen_negsetcondi_tl tcg_gen_negsetcondi_i64
46
#define tcg_gen_mul_tl tcg_gen_mul_i64
47
#define tcg_gen_muli_tl tcg_gen_muli_i64
48
#define tcg_gen_div_tl tcg_gen_div_i64
49
@@ -XXX,XX +XXX,XX @@ DEF_ATOMIC2(tcg_gen_atomic_umax_fetch, i64)
50
#define tcg_gen_setcond_tl tcg_gen_setcond_i32
51
#define tcg_gen_setcondi_tl tcg_gen_setcondi_i32
52
#define tcg_gen_negsetcond_tl tcg_gen_negsetcond_i32
53
+#define tcg_gen_negsetcondi_tl tcg_gen_negsetcondi_i32
54
#define tcg_gen_mul_tl tcg_gen_mul_i32
55
#define tcg_gen_muli_tl tcg_gen_muli_i32
56
#define tcg_gen_div_tl tcg_gen_div_i32
57
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
58
index XXXXXXX..XXXXXXX 100644
59
--- a/tcg/tcg-op.c
60
+++ b/tcg/tcg-op.c
61
@@ -XXX,XX +XXX,XX @@ void tcg_gen_negsetcond_i32(TCGCond cond, TCGv_i32 ret,
62
}
63
}
64
65
+void tcg_gen_negsetcondi_i32(TCGCond cond, TCGv_i32 ret,
66
+ TCGv_i32 arg1, int32_t arg2)
67
+{
68
+ tcg_gen_negsetcond_i32(cond, ret, arg1, tcg_constant_i32(arg2));
69
+}
70
+
71
void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
72
{
73
if (arg2 == 0) {
74
@@ -XXX,XX +XXX,XX @@ void tcg_gen_setcondi_i64(TCGCond cond, TCGv_i64 ret,
75
}
76
}
77
78
+void tcg_gen_negsetcondi_i64(TCGCond cond, TCGv_i64 ret,
79
+ TCGv_i64 arg1, int64_t arg2)
80
+{
81
+ tcg_gen_negsetcond_i64(cond, ret, arg1, tcg_constant_i64(arg2));
82
+}
83
+
84
void tcg_gen_negsetcond_i64(TCGCond cond, TCGv_i64 ret,
85
TCGv_i64 arg1, TCGv_i64 arg2)
86
{
87
--
88
2.34.1
diff view generated by jsdifflib
New patch
1
The two concrete type functions already existed, merely needing
2
a bit of hardening to invalid inputs.
1
3
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
include/tcg/tcg-op-common.h | 2 ++
8
include/tcg/tcg-op.h | 2 ++
9
tcg/tcg-op-ldst.c | 14 ++++++++++----
10
3 files changed, 14 insertions(+), 4 deletions(-)
11
12
diff --git a/include/tcg/tcg-op-common.h b/include/tcg/tcg-op-common.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/include/tcg/tcg-op-common.h
15
+++ b/include/tcg/tcg-op-common.h
16
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ext8s_i32(TCGv_i32 ret, TCGv_i32 arg);
17
void tcg_gen_ext16s_i32(TCGv_i32 ret, TCGv_i32 arg);
18
void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg);
19
void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg);
20
+void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc);
21
void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags);
22
void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg);
23
void tcg_gen_hswap_i32(TCGv_i32 ret, TCGv_i32 arg);
24
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ext32s_i64(TCGv_i64 ret, TCGv_i64 arg);
25
void tcg_gen_ext8u_i64(TCGv_i64 ret, TCGv_i64 arg);
26
void tcg_gen_ext16u_i64(TCGv_i64 ret, TCGv_i64 arg);
27
void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg);
28
+void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc);
29
void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg, int flags);
30
void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg, int flags);
31
void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg);
32
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
33
index XXXXXXX..XXXXXXX 100644
34
--- a/include/tcg/tcg-op.h
35
+++ b/include/tcg/tcg-op.h
36
@@ -XXX,XX +XXX,XX @@ DEF_ATOMIC2(tcg_gen_atomic_umax_fetch, i64)
37
#define tcg_gen_ext16s_tl tcg_gen_ext16s_i64
38
#define tcg_gen_ext32u_tl tcg_gen_ext32u_i64
39
#define tcg_gen_ext32s_tl tcg_gen_ext32s_i64
40
+#define tcg_gen_ext_tl tcg_gen_ext_i64
41
#define tcg_gen_bswap16_tl tcg_gen_bswap16_i64
42
#define tcg_gen_bswap32_tl tcg_gen_bswap32_i64
43
#define tcg_gen_bswap64_tl tcg_gen_bswap64_i64
44
@@ -XXX,XX +XXX,XX @@ DEF_ATOMIC2(tcg_gen_atomic_umax_fetch, i64)
45
#define tcg_gen_ext16s_tl tcg_gen_ext16s_i32
46
#define tcg_gen_ext32u_tl tcg_gen_mov_i32
47
#define tcg_gen_ext32s_tl tcg_gen_mov_i32
48
+#define tcg_gen_ext_tl tcg_gen_ext_i32
49
#define tcg_gen_bswap16_tl tcg_gen_bswap16_i32
50
#define tcg_gen_bswap32_tl(D, S, F) tcg_gen_bswap32_i32(D, S)
51
#define tcg_gen_bswap_tl tcg_gen_bswap32_i32
52
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
53
index XXXXXXX..XXXXXXX 100644
54
--- a/tcg/tcg-op-ldst.c
55
+++ b/tcg/tcg-op-ldst.c
56
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i128_chk(TCGv_i128 val, TCGTemp *addr, TCGArg idx,
57
tcg_gen_qemu_st_i128_int(val, addr, idx, memop);
58
}
59
60
-static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
61
+void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
62
{
63
switch (opc & MO_SSIZE) {
64
case MO_SB:
65
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
66
case MO_UW:
67
tcg_gen_ext16u_i32(ret, val);
68
break;
69
- default:
70
+ case MO_UL:
71
+ case MO_SL:
72
tcg_gen_mov_i32(ret, val);
73
break;
74
+ default:
75
+ g_assert_not_reached();
76
}
77
}
78
79
-static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
80
+void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
81
{
82
switch (opc & MO_SSIZE) {
83
case MO_SB:
84
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
85
case MO_UL:
86
tcg_gen_ext32u_i64(ret, val);
87
break;
88
- default:
89
+ case MO_UQ:
90
+ case MO_SQ:
91
tcg_gen_mov_i64(ret, val);
92
break;
93
+ default:
94
+ g_assert_not_reached();
95
}
96
}
97
98
--
99
2.34.1
100
101
diff view generated by jsdifflib
New patch
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
2
3
This will also come in handy later for "less than" comparisons.
4
5
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
6
Message-Id: <03ba02fd-fade-4409-be16-2f81a5690b4c@redhat.com>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
10
include/exec/target_long.h | 2 ++
11
1 file changed, 2 insertions(+)
12
13
diff --git a/include/exec/target_long.h b/include/exec/target_long.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/include/exec/target_long.h
16
+++ b/include/exec/target_long.h
17
@@ -XXX,XX +XXX,XX @@ typedef uint32_t target_ulong;
18
#define TARGET_FMT_lx "%08x"
19
#define TARGET_FMT_ld "%d"
20
#define TARGET_FMT_lu "%u"
21
+#define MO_TL MO_32
22
#elif TARGET_LONG_SIZE == 8
23
typedef int64_t target_long;
24
typedef uint64_t target_ulong;
25
#define TARGET_FMT_lx "%016" PRIx64
26
#define TARGET_FMT_ld "%" PRId64
27
#define TARGET_FMT_lu "%" PRIu64
28
+#define MO_TL MO_64
29
#else
30
#error TARGET_LONG_SIZE undefined
31
#endif
32
--
33
2.34.1
diff view generated by jsdifflib
New patch
1
The ext_and_shift_reg helper does this plus a shift.
2
The non-zero check for shift count is duplicate to
3
the one done within tcg_gen_shli_i64.
1
4
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/arm/tcg/translate-a64.c | 37 ++--------------------------------
8
1 file changed, 2 insertions(+), 35 deletions(-)
9
10
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/arm/tcg/translate-a64.c
13
+++ b/target/arm/tcg/translate-a64.c
14
@@ -XXX,XX +XXX,XX @@ static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
15
int extsize = extract32(option, 0, 2);
16
bool is_signed = extract32(option, 2, 1);
17
18
- if (is_signed) {
19
- switch (extsize) {
20
- case 0:
21
- tcg_gen_ext8s_i64(tcg_out, tcg_in);
22
- break;
23
- case 1:
24
- tcg_gen_ext16s_i64(tcg_out, tcg_in);
25
- break;
26
- case 2:
27
- tcg_gen_ext32s_i64(tcg_out, tcg_in);
28
- break;
29
- case 3:
30
- tcg_gen_mov_i64(tcg_out, tcg_in);
31
- break;
32
- }
33
- } else {
34
- switch (extsize) {
35
- case 0:
36
- tcg_gen_ext8u_i64(tcg_out, tcg_in);
37
- break;
38
- case 1:
39
- tcg_gen_ext16u_i64(tcg_out, tcg_in);
40
- break;
41
- case 2:
42
- tcg_gen_ext32u_i64(tcg_out, tcg_in);
43
- break;
44
- case 3:
45
- tcg_gen_mov_i64(tcg_out, tcg_in);
46
- break;
47
- }
48
- }
49
-
50
- if (shift) {
51
- tcg_gen_shli_i64(tcg_out, tcg_out, shift);
52
- }
53
+ tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0));
54
+ tcg_gen_shli_i64(tcg_out, tcg_out, shift);
55
}
56
57
static inline void gen_check_sp_alignment(DisasContext *s)
58
--
59
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
target/i386/tcg/translate.c | 28 +++-------------------------
5
1 file changed, 3 insertions(+), 25 deletions(-)
1
6
7
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/target/i386/tcg/translate.c
10
+++ b/target/i386/tcg/translate.c
11
@@ -XXX,XX +XXX,XX @@ static inline void gen_op_movl_T0_Dshift(DisasContext *s, MemOp ot)
12
13
static TCGv gen_ext_tl(TCGv dst, TCGv src, MemOp size, bool sign)
14
{
15
- switch (size) {
16
- case MO_8:
17
- if (sign) {
18
- tcg_gen_ext8s_tl(dst, src);
19
- } else {
20
- tcg_gen_ext8u_tl(dst, src);
21
- }
22
- return dst;
23
- case MO_16:
24
- if (sign) {
25
- tcg_gen_ext16s_tl(dst, src);
26
- } else {
27
- tcg_gen_ext16u_tl(dst, src);
28
- }
29
- return dst;
30
-#ifdef TARGET_X86_64
31
- case MO_32:
32
- if (sign) {
33
- tcg_gen_ext32s_tl(dst, src);
34
- } else {
35
- tcg_gen_ext32u_tl(dst, src);
36
- }
37
- return dst;
38
-#endif
39
- default:
40
+ if (size == MO_TL) {
41
return src;
42
}
43
+ tcg_gen_ext_tl(dst, src, size | (sign ? MO_SIGN : 0));
44
+ return dst;
45
}
46
47
static void gen_extu(MemOp ot, TCGv reg)
48
--
49
2.34.1
50
51
diff view generated by jsdifflib
New patch
1
We still need to check OS_{BYTE,WORD,LONG},
2
because m68k includes floating point in OS_*.
1
3
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/m68k/translate.c | 23 +++--------------------
8
1 file changed, 3 insertions(+), 20 deletions(-)
9
10
diff --git a/target/m68k/translate.c b/target/m68k/translate.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/m68k/translate.c
13
+++ b/target/m68k/translate.c
14
@@ -XXX,XX +XXX,XX @@ static inline void gen_ext(TCGv res, TCGv val, int opsize, int sign)
15
{
16
switch (opsize) {
17
case OS_BYTE:
18
- if (sign) {
19
- tcg_gen_ext8s_i32(res, val);
20
- } else {
21
- tcg_gen_ext8u_i32(res, val);
22
- }
23
- break;
24
case OS_WORD:
25
- if (sign) {
26
- tcg_gen_ext16s_i32(res, val);
27
- } else {
28
- tcg_gen_ext16u_i32(res, val);
29
- }
30
- break;
31
case OS_LONG:
32
- tcg_gen_mov_i32(res, val);
33
+ tcg_gen_ext_i32(res, val, opsize | (sign ? MO_SIGN : 0));
34
break;
35
default:
36
g_assert_not_reached();
37
@@ -XXX,XX +XXX,XX @@ static int gen_ea_mode_fp(CPUM68KState *env, DisasContext *s, int mode,
38
tmp = tcg_temp_new();
39
switch (opsize) {
40
case OS_BYTE:
41
- tcg_gen_ext8s_i32(tmp, reg);
42
- gen_helper_exts32(tcg_env, fp, tmp);
43
- break;
44
case OS_WORD:
45
- tcg_gen_ext16s_i32(tmp, reg);
46
- gen_helper_exts32(tcg_env, fp, tmp);
47
- break;
48
case OS_LONG:
49
- gen_helper_exts32(tcg_env, fp, reg);
50
+ tcg_gen_ext_i32(tmp, reg, opsize | MO_SIGN);
51
+ gen_helper_exts32(tcg_env, fp, tmp);
52
break;
53
case OS_SINGLE:
54
gen_helper_extf32(tcg_env, fp, reg);
55
--
56
2.34.1
57
58
diff view generated by jsdifflib
New patch
1
Reviewed-by: Yoshinori Sato <ysato@users.sourceforge.jp>
2
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
target/rx/translate.c | 11 +++--------
6
1 file changed, 3 insertions(+), 8 deletions(-)
1
7
8
diff --git a/target/rx/translate.c b/target/rx/translate.c
9
index XXXXXXX..XXXXXXX 100644
10
--- a/target/rx/translate.c
11
+++ b/target/rx/translate.c
12
@@ -XXX,XX +XXX,XX @@ static bool trans_MOV_ra(DisasContext *ctx, arg_MOV_ra *a)
13
/* mov.<bwl> rs,rd */
14
static bool trans_MOV_mm(DisasContext *ctx, arg_MOV_mm *a)
15
{
16
- static void (* const mov[])(TCGv ret, TCGv arg) = {
17
- tcg_gen_ext8s_i32, tcg_gen_ext16s_i32, tcg_gen_mov_i32,
18
- };
19
TCGv tmp, mem, addr;
20
+
21
if (a->lds == 3 && a->ldd == 3) {
22
/* mov.<bwl> rs,rd */
23
- mov[a->sz](cpu_regs[a->rd], cpu_regs[a->rs]);
24
+ tcg_gen_ext_i32(cpu_regs[a->rd], cpu_regs[a->rs], a->sz | MO_SIGN);
25
return true;
26
}
27
28
@@ -XXX,XX +XXX,XX @@ static bool trans_MOVU_mr(DisasContext *ctx, arg_MOVU_mr *a)
29
/* movu.<bw> rs,rd */
30
static bool trans_MOVU_rr(DisasContext *ctx, arg_MOVU_rr *a)
31
{
32
- static void (* const ext[])(TCGv ret, TCGv arg) = {
33
- tcg_gen_ext8u_i32, tcg_gen_ext16u_i32,
34
- };
35
- ext[a->sz](cpu_regs[a->rd], cpu_regs[a->rs]);
36
+ tcg_gen_ext_i32(cpu_regs[a->rd], cpu_regs[a->rs], a->sz);
37
return true;
38
}
39
40
--
41
2.34.1
42
43
diff view generated by jsdifflib
New patch
1
The EXTR instructions can use the extract opcodes.
1
2
3
Reviewed-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
target/tricore/translate.c | 20 ++++----------------
7
1 file changed, 4 insertions(+), 16 deletions(-)
8
9
diff --git a/target/tricore/translate.c b/target/tricore/translate.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/target/tricore/translate.c
12
+++ b/target/tricore/translate.c
13
@@ -XXX,XX +XXX,XX @@ static void decode_rrpw_extract_insert(DisasContext *ctx)
14
switch (op2) {
15
case OPC2_32_RRPW_EXTR:
16
if (width == 0) {
17
- tcg_gen_movi_tl(cpu_gpr_d[r3], 0);
18
- break;
19
- }
20
-
21
- if (pos + width <= 32) {
22
- /* optimize special cases */
23
- if ((pos == 0) && (width == 8)) {
24
- tcg_gen_ext8s_tl(cpu_gpr_d[r3], cpu_gpr_d[r1]);
25
- } else if ((pos == 0) && (width == 16)) {
26
- tcg_gen_ext16s_tl(cpu_gpr_d[r3], cpu_gpr_d[r1]);
27
- } else {
28
- tcg_gen_shli_tl(cpu_gpr_d[r3], cpu_gpr_d[r1], 32 - pos - width);
29
- tcg_gen_sari_tl(cpu_gpr_d[r3], cpu_gpr_d[r3], 32 - width);
30
- }
31
+ tcg_gen_movi_tl(cpu_gpr_d[r3], 0);
32
+ } else if (pos + width <= 32) {
33
+ tcg_gen_sextract_tl(cpu_gpr_d[r3], cpu_gpr_d[r1], pos, width);
34
}
35
break;
36
case OPC2_32_RRPW_EXTR_U:
37
if (width == 0) {
38
tcg_gen_movi_tl(cpu_gpr_d[r3], 0);
39
} else {
40
- tcg_gen_shri_tl(cpu_gpr_d[r3], cpu_gpr_d[r1], pos);
41
- tcg_gen_andi_tl(cpu_gpr_d[r3], cpu_gpr_d[r3], ~0u >> (32-width));
42
+ tcg_gen_extract_tl(cpu_gpr_d[r3], cpu_gpr_d[r1], pos, width);
43
}
44
break;
45
case OPC2_32_RRPW_IMASK:
46
--
47
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Max Filippov <jcmvbkbc@gmail.com>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
target/xtensa/translate.c | 12 +-----------
5
1 file changed, 1 insertion(+), 11 deletions(-)
1
6
7
diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/target/xtensa/translate.c
10
+++ b/target/xtensa/translate.c
11
@@ -XXX,XX +XXX,XX @@ static void translate_salt(DisasContext *dc, const OpcodeArg arg[],
12
static void translate_sext(DisasContext *dc, const OpcodeArg arg[],
13
const uint32_t par[])
14
{
15
- int shift = 31 - arg[2].imm;
16
-
17
- if (shift == 24) {
18
- tcg_gen_ext8s_i32(arg[0].out, arg[1].in);
19
- } else if (shift == 16) {
20
- tcg_gen_ext16s_i32(arg[0].out, arg[1].in);
21
- } else {
22
- TCGv_i32 tmp = tcg_temp_new_i32();
23
- tcg_gen_shli_i32(tmp, arg[1].in, shift);
24
- tcg_gen_sari_i32(arg[0].out, tmp, shift);
25
- }
26
+ tcg_gen_sextract_i32(arg[0].out, arg[1].in, 0, arg[2].imm + 1);
27
}
28
29
static uint32_t test_exceptions_simcall(DisasContext *dc,
30
--
31
2.34.1
diff view generated by jsdifflib