1
The following changes since commit c586691e676214eb7edf6a468e84e7ce3b314d43:
1
v3: Rebase and add a few more patches.
2
2
3
Merge tag 'pull-target-arm-20230502-2' of https://git.linaro.org/people/pmaydell/qemu-arm into staging (2023-05-02 16:38:29 +0100)
3
4
r~
5
6
7
The following changes since commit 384dbdda94c0bba55bf186cccd3714bbb9b737e9:
8
9
Merge tag 'migration-20231020-pull-request' of https://gitlab.com/juan.quintela/qemu into staging (2023-10-20 06:46:53 -0700)
4
10
5
are available in the Git repository at:
11
are available in the Git repository at:
6
12
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230502-2
13
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20231023
8
14
9
for you to fetch changes up to 129f1f9ee7df77d367d961b3c25353612d33cd83:
15
for you to fetch changes up to e40df3522b384d3b7dd38187d735bd6228b20e47:
10
16
11
tcg: Introduce tcg_out_movext2 (2023-05-02 13:05:45 -0700)
17
target/xtensa: Use tcg_gen_sextract_i32 (2023-10-22 16:44:49 -0700)
12
18
13
----------------------------------------------------------------
19
----------------------------------------------------------------
14
Misc tcg-related patch queue.
20
tcg: Drop unused tcg_temp_free define
15
21
tcg: Introduce tcg_use_softmmu
16
v2: Update bitops.h rotate patch.
22
tcg: Optimize past conditional branches
23
tcg: Use constant zero when expanding with divu2
24
tcg: Add negsetcondi
25
tcg: Define MO_TL
26
tcg: Export tcg_gen_ext_{i32,i64,tl}
27
target/*: Use tcg_gen_ext_*
28
tcg/ppc: Enable direct branching tcg_out_goto_tb with TCG_REG_TB
29
tcg/ppc: Use ADDPCIS for power9
30
tcg/ppc: Use prefixed instructions for power10
31
tcg/ppc: Disable TCG_REG_TB for Power9/Power10
32
tcg/ppc: Enable direct branching tcg_out_goto_tb with TCG_REG_TB
33
tcg/ppc: Use ADDPCIS for power9
34
tcg/ppc: Use prefixed instructions for power10
35
tcg/ppc: Disable TCG_REG_TB for Power9/Power10
17
36
18
----------------------------------------------------------------
37
----------------------------------------------------------------
19
Dickon Hood (1):
38
Jordan Niethe (1):
20
qemu/bitops.h: Limit rotate amounts
39
tcg/ppc: Enable direct branching tcg_out_goto_tb with TCG_REG_TB
21
40
22
Kiran Ostrolenk (1):
41
Mike Frysinger (1):
23
qemu/host-utils.h: Add clz and ctz functions for lower-bit integers
42
tcg: drop unused tcg_temp_free define
24
43
25
Nazar Kazakov (2):
44
Paolo Bonzini (2):
26
tcg: Add tcg_gen_gvec_andcs
45
tcg: add negsetcondi
27
tcg: Add tcg_gen_gvec_rotrs
46
tcg: Define MO_TL
28
47
29
Richard Henderson (7):
48
Richard Henderson (34):
30
softmmu: Tidy dirtylimit_dirty_ring_full_time
49
tcg/ppc: Untabify tcg-target.c.inc
31
qemu/int128: Re-shuffle Int128Alias members
50
tcg/ppc: Reinterpret tb-relative to TB+4
32
migration/xbzrle: Use __attribute__((target)) for avx512
51
tcg/ppc: Use ADDPCIS in tcg_out_tb_start
33
accel/tcg: Add cpu_ld*_code_mmu
52
tcg/ppc: Use ADDPCIS in tcg_out_movi_int
34
tcg/loongarch64: Conditionalize tcg_out_exts_i32_i64
53
tcg/ppc: Use ADDPCIS for the constant pool
35
tcg/mips: Conditionalize tcg_out_exts_i32_i64
54
tcg/ppc: Use ADDPCIS in tcg_out_goto_tb
36
tcg: Introduce tcg_out_movext2
55
tcg/ppc: Use PADDI in tcg_out_movi
56
tcg/ppc: Use prefixed instructions in tcg_out_mem_long
57
tcg/ppc: Use PLD in tcg_out_movi for constant pool
58
tcg/ppc: Use prefixed instructions in tcg_out_dupi_vec
59
tcg/ppc: Use PLD in tcg_out_goto_tb
60
tcg/ppc: Disable TCG_REG_TB for Power9/Power10
61
tcg: Introduce tcg_use_softmmu
62
tcg: Provide guest_base fallback for system mode
63
tcg/arm: Use tcg_use_softmmu
64
tcg/aarch64: Use tcg_use_softmmu
65
tcg/i386: Use tcg_use_softmmu
66
tcg/loongarch64: Use tcg_use_softmmu
67
tcg/mips: Use tcg_use_softmmu
68
tcg/ppc: Use tcg_use_softmmu
69
tcg/riscv: Do not reserve TCG_GUEST_BASE_REG for guest_base zero
70
tcg/riscv: Use tcg_use_softmmu
71
tcg/s390x: Use tcg_use_softmmu
72
tcg: Use constant zero when expanding with divu2
73
tcg: Optimize past conditional branches
74
tcg: Add tcg_gen_{ld,st}_i128
75
target/i386: Use i128 for 128 and 256-bit loads and stores
76
tcg: Export tcg_gen_ext_{i32,i64,tl}
77
target/arm: Use tcg_gen_ext_i64
78
target/i386: Use tcg_gen_ext_tl
79
target/m68k: Use tcg_gen_ext_i32
80
target/rx: Use tcg_gen_ext_i32
81
target/tricore: Use tcg_gen_*extract_tl
82
target/xtensa: Use tcg_gen_sextract_i32
37
83
38
Weiwei Li (1):
84
include/exec/target_long.h | 2 +
39
accel/tcg: Uncache the host address for instruction fetch when tlb size < 1
85
include/tcg/tcg-op-common.h | 9 +
40
86
include/tcg/tcg-op.h | 6 +-
41
meson.build | 5 +--
87
include/tcg/tcg.h | 8 +-
42
accel/tcg/tcg-runtime.h | 1 +
88
target/arm/tcg/translate-a64.c | 37 +--
43
include/exec/cpu_ldst.h | 9 ++++++
89
target/i386/tcg/translate.c | 91 +++----
44
include/qemu/bitops.h | 16 +++++-----
90
target/m68k/translate.c | 23 +-
45
include/qemu/host-utils.h | 54 +++++++++++++++++++++++++++++++
91
target/rx/translate.c | 11 +-
46
include/qemu/int128.h | 4 +--
92
target/tricore/translate.c | 20 +-
47
include/tcg/tcg-op-gvec.h | 4 +++
93
target/xtensa/translate.c | 12 +-
48
accel/tcg/cputlb.c | 53 ++++++++++++++++++++++++++++++
94
tcg/optimize.c | 8 +-
49
accel/tcg/tcg-runtime-gvec.c | 11 +++++++
95
tcg/tcg-op-ldst.c | 28 +-
50
accel/tcg/user-exec.c | 58 +++++++++++++++++++++++++++++++++
96
tcg/tcg-op.c | 50 +++-
51
migration/xbzrle.c | 9 +++---
97
tcg/tcg.c | 13 +-
52
softmmu/dirtylimit.c | 15 ++++++---
98
tcg/aarch64/tcg-target.c.inc | 177 ++++++------
53
tcg/tcg-op-gvec.c | 28 ++++++++++++++++
99
tcg/arm/tcg-target.c.inc | 203 +++++++-------
54
tcg/tcg.c | 69 +++++++++++++++++++++++++++++++++++++---
100
tcg/i386/tcg-target.c.inc | 198 +++++++-------
55
tcg/arm/tcg-target.c.inc | 44 +++++++++++--------------
101
tcg/loongarch64/tcg-target.c.inc | 126 +++++----
56
tcg/i386/tcg-target.c.inc | 19 +++++------
102
tcg/mips/tcg-target.c.inc | 231 ++++++++--------
57
tcg/loongarch64/tcg-target.c.inc | 4 ++-
103
tcg/ppc/tcg-target.c.inc | 561 ++++++++++++++++++++++++++-------------
58
tcg/mips/tcg-target.c.inc | 4 ++-
104
tcg/riscv/tcg-target.c.inc | 189 ++++++-------
59
18 files changed, 339 insertions(+), 68 deletions(-)
105
tcg/s390x/tcg-target.c.inc | 161 ++++++-----
106
22 files changed, 1152 insertions(+), 1012 deletions(-)
diff view generated by jsdifflib
New patch
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
---
3
tcg/ppc/tcg-target.c.inc | 6 +++---
4
1 file changed, 3 insertions(+), 3 deletions(-)
1
5
6
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
7
index XXXXXXX..XXXXXXX 100644
8
--- a/tcg/ppc/tcg-target.c.inc
9
+++ b/tcg/ppc/tcg-target.c.inc
10
@@ -XXX,XX +XXX,XX @@ static inline bool in_range_b(tcg_target_long target)
11
}
12
13
static uint32_t reloc_pc24_val(const tcg_insn_unit *pc,
14
-             const tcg_insn_unit *target)
15
+ const tcg_insn_unit *target)
16
{
17
ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
18
tcg_debug_assert(in_range_b(disp));
19
@@ -XXX,XX +XXX,XX @@ static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
20
}
21
22
static uint16_t reloc_pc14_val(const tcg_insn_unit *pc,
23
-             const tcg_insn_unit *target)
24
+ const tcg_insn_unit *target)
25
{
26
ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
27
tcg_debug_assert(disp == (int16_t) disp);
28
@@ -XXX,XX +XXX,XX @@ static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
29
tcgv_vec_arg(t1), tcgv_vec_arg(t2));
30
vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0),
31
tcgv_vec_arg(v0), tcgv_vec_arg(t1));
32
-    break;
33
+ break;
34
35
case MO_32:
36
tcg_debug_assert(!have_isa_2_07);
37
--
38
2.34.1
diff view generated by jsdifflib
New patch
1
From: Jordan Niethe <jniethe5@gmail.com>
1
2
3
Direct branch patching was disabled when using TCG_REG_TB in commit
4
736a1588c1 ("tcg/ppc: Fix race in goto_tb implementation").
5
6
The issue with direct branch patching with TCG_REG_TB is the lack of
7
synchronization between the new TCG_REG_TB being established and the
8
direct branch being patched in.
9
10
If each translation block is responsible for establishing its own
11
TCG_REG_TB then there can be no synchronization issue.
12
13
Make each translation block begin by setting up its own TCG_REG_TB.
14
Use the preferred 'bcl 20,31,$+4' sequence.
15
16
Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
17
[rth: Split out tcg_out_tb_start, power9 addpcis]
18
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
19
---
20
tcg/ppc/tcg-target.c.inc | 48 ++++++++++++++--------------------------
21
1 file changed, 17 insertions(+), 31 deletions(-)
22
23
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
24
index XXXXXXX..XXXXXXX 100644
25
--- a/tcg/ppc/tcg-target.c.inc
26
+++ b/tcg/ppc/tcg-target.c.inc
27
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
28
29
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
30
tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
31
- if (USE_REG_TB) {
32
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, tcg_target_call_iarg_regs[1]);
33
- }
34
tcg_out32(s, BCCTR | BO_ALWAYS);
35
36
/* Epilogue */
37
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
38
39
static void tcg_out_tb_start(TCGContext *s)
40
{
41
- /* nothing to do */
42
+ /* Load TCG_REG_TB. */
43
+ if (USE_REG_TB) {
44
+ /* bcl 20,31,$+4 (preferred form for getting nia) */
45
+ tcg_out32(s, BC | BO_ALWAYS | BI(7, CR_SO) | 0x4 | LK);
46
+ tcg_out32(s, MFSPR | RT(TCG_REG_TB) | LR);
47
+ tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, -4));
48
+ }
49
}
50
51
static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg)
52
@@ -XXX,XX +XXX,XX @@ static void tcg_out_goto_tb(TCGContext *s, int which)
53
{
54
uintptr_t ptr = get_jmp_target_addr(s, which);
55
56
+ /* Direct branch will be patched by tb_target_set_jmp_target. */
57
+ set_jmp_insn_offset(s, which);
58
+ tcg_out32(s, NOP);
59
+
60
+ /* When branch is out of range, fall through to indirect. */
61
if (USE_REG_TB) {
62
ptrdiff_t offset = tcg_tbrel_diff(s, (void *)ptr);
63
- tcg_out_mem_long(s, LD, LDX, TCG_REG_TB, TCG_REG_TB, offset);
64
-
65
- /* TODO: Use direct branches when possible. */
66
- set_jmp_insn_offset(s, which);
67
- tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR);
68
-
69
- tcg_out32(s, BCCTR | BO_ALWAYS);
70
-
71
- /* For the unlinked case, need to reset TCG_REG_TB. */
72
- set_jmp_reset_offset(s, which);
73
- tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB,
74
- -tcg_current_code_size(s));
75
+ tcg_out_mem_long(s, LD, LDX, TCG_REG_TMP1, TCG_REG_TB, offset);
76
} else {
77
- /* Direct branch will be patched by tb_target_set_jmp_target. */
78
- set_jmp_insn_offset(s, which);
79
- tcg_out32(s, NOP);
80
-
81
- /* When branch is out of range, fall through to indirect. */
82
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - (int16_t)ptr);
83
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, (int16_t)ptr);
84
- tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
85
- tcg_out32(s, BCCTR | BO_ALWAYS);
86
- set_jmp_reset_offset(s, which);
87
}
88
+
89
+ tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
90
+ tcg_out32(s, BCCTR | BO_ALWAYS);
91
+ set_jmp_reset_offset(s, which);
92
}
93
94
void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
95
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
96
intptr_t diff = addr - jmp_rx;
97
tcg_insn_unit insn;
98
99
- if (USE_REG_TB) {
100
- return;
101
- }
102
-
103
if (in_range_b(diff)) {
104
insn = B | (diff & 0x3fffffc);
105
} else {
106
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
107
switch (opc) {
108
case INDEX_op_goto_ptr:
109
tcg_out32(s, MTSPR | RS(args[0]) | CTR);
110
- if (USE_REG_TB) {
111
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, args[0]);
112
- }
113
tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0));
114
tcg_out32(s, BCCTR | BO_ALWAYS);
115
break;
116
--
117
2.34.1
diff view generated by jsdifflib
New patch
1
It saves one insn to load the address of TB+4 instead of TB.
2
Adjust all of the indexing to match.
1
3
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/ppc/tcg-target.c.inc | 15 ++++++++++-----
7
1 file changed, 10 insertions(+), 5 deletions(-)
8
9
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/ppc/tcg-target.c.inc
12
+++ b/tcg/ppc/tcg-target.c.inc
13
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_callee_save_regs[] = {
14
TCG_REG_R31
15
};
16
17
+/* For PPC, we use TB+4 instead of TB as the base. */
18
+static inline ptrdiff_t ppc_tbrel_diff(TCGContext *s, const void *target)
19
+{
20
+ return tcg_tbrel_diff(s, target) - 4;
21
+}
22
+
23
static inline bool in_range_b(tcg_target_long target)
24
{
25
return target == sextract64(target, 0, 26);
26
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
27
}
28
29
/* Load addresses within the TB with one insn. */
30
- tb_diff = tcg_tbrel_diff(s, (void *)arg);
31
+ tb_diff = ppc_tbrel_diff(s, (void *)arg);
32
if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) {
33
tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff));
34
return;
35
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
36
/* Use the constant pool, if possible. */
37
if (!in_prologue && USE_REG_TB) {
38
new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr,
39
- tcg_tbrel_diff(s, NULL));
40
+ ppc_tbrel_diff(s, NULL));
41
tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0));
42
return;
43
}
44
@@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
45
*/
46
if (USE_REG_TB) {
47
rel = R_PPC_ADDR16;
48
- add = tcg_tbrel_diff(s, NULL);
49
+ add = ppc_tbrel_diff(s, NULL);
50
} else {
51
rel = R_PPC_ADDR32;
52
add = 0;
53
@@ -XXX,XX +XXX,XX @@ static void tcg_out_tb_start(TCGContext *s)
54
/* bcl 20,31,$+4 (preferred form for getting nia) */
55
tcg_out32(s, BC | BO_ALWAYS | BI(7, CR_SO) | 0x4 | LK);
56
tcg_out32(s, MFSPR | RT(TCG_REG_TB) | LR);
57
- tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, -4));
58
}
59
}
60
61
@@ -XXX,XX +XXX,XX @@ static void tcg_out_goto_tb(TCGContext *s, int which)
62
63
/* When branch is out of range, fall through to indirect. */
64
if (USE_REG_TB) {
65
- ptrdiff_t offset = tcg_tbrel_diff(s, (void *)ptr);
66
+ ptrdiff_t offset = ppc_tbrel_diff(s, (void *)ptr);
67
tcg_out_mem_long(s, LD, LDX, TCG_REG_TMP1, TCG_REG_TB, offset);
68
} else {
69
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - (int16_t)ptr);
70
--
71
2.34.1
diff view generated by jsdifflib
New patch
1
With ISA v3.0, we can use ADDPCIS instead of BCL+MFLR to load NIA.
1
2
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
tcg/ppc/tcg-target.c.inc | 25 ++++++++++++++++++++++---
6
1 file changed, 22 insertions(+), 3 deletions(-)
7
8
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/ppc/tcg-target.c.inc
11
+++ b/tcg/ppc/tcg-target.c.inc
12
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
13
#define CRNAND XO19(225)
14
#define CROR XO19(449)
15
#define CRNOR XO19( 33)
16
+#define ADDPCIS XO19( 2)
17
18
#define EXTSB XO31(954)
19
#define EXTSH XO31(922)
20
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c)
21
tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2));
22
}
23
24
+static void tcg_out_addpcis(TCGContext *s, TCGReg dst, intptr_t imm)
25
+{
26
+ uint32_t d0, d1, d2;
27
+
28
+ tcg_debug_assert((imm & 0xffff) == 0);
29
+ tcg_debug_assert(imm == (int32_t)imm);
30
+
31
+ d2 = extract32(imm, 16, 1);
32
+ d1 = extract32(imm, 17, 5);
33
+ d0 = extract32(imm, 22, 10);
34
+ tcg_out32(s, ADDPCIS | RT(dst) | (d1 << 16) | (d0 << 6) | d2);
35
+}
36
+
37
static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src, int flags)
38
{
39
TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
40
@@ -XXX,XX +XXX,XX @@ static void tcg_out_tb_start(TCGContext *s)
41
{
42
/* Load TCG_REG_TB. */
43
if (USE_REG_TB) {
44
- /* bcl 20,31,$+4 (preferred form for getting nia) */
45
- tcg_out32(s, BC | BO_ALWAYS | BI(7, CR_SO) | 0x4 | LK);
46
- tcg_out32(s, MFSPR | RT(TCG_REG_TB) | LR);
47
+ if (have_isa_3_00) {
48
+ /* lnia REG_TB */
49
+ tcg_out_addpcis(s, TCG_REG_TB, 0);
50
+ } else {
51
+ /* bcl 20,31,$+4 (preferred form for getting nia) */
52
+ tcg_out32(s, BC | BO_ALWAYS | BI(7, CR_SO) | 0x4 | LK);
53
+ tcg_out32(s, MFSPR | RT(TCG_REG_TB) | LR);
54
+ }
55
}
56
}
57
58
--
59
2.34.1
diff view generated by jsdifflib
New patch
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
---
3
tcg/ppc/tcg-target.c.inc | 13 +++++++++++++
4
1 file changed, 13 insertions(+)
1
5
6
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
7
index XXXXXXX..XXXXXXX 100644
8
--- a/tcg/ppc/tcg-target.c.inc
9
+++ b/tcg/ppc/tcg-target.c.inc
10
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
11
return;
12
}
13
14
+ /* Load addresses within 2GB with 2 insns. */
15
+ if (have_isa_3_00) {
16
+ intptr_t hi = tcg_pcrel_diff(s, (void *)arg) - 4;
17
+ int16_t lo = hi;
18
+
19
+ hi -= lo;
20
+ if (hi == (int32_t)hi) {
21
+ tcg_out_addpcis(s, TCG_REG_TMP2, hi);
22
+ tcg_out32(s, ADDI | TAI(ret, TCG_REG_TMP2, lo));
23
+ return;
24
+ }
25
+ }
26
+
27
/* Load addresses within 2GB of TB with 2 (or rarely 3) insns. */
28
if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) {
29
tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff);
30
--
31
2.34.1
diff view generated by jsdifflib
New patch
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
---
3
tcg/ppc/tcg-target.c.inc | 12 ++++++++++++
4
1 file changed, 12 insertions(+)
1
5
6
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
7
index XXXXXXX..XXXXXXX 100644
8
--- a/tcg/ppc/tcg-target.c.inc
9
+++ b/tcg/ppc/tcg-target.c.inc
10
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
11
tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0));
12
return;
13
}
14
+ if (have_isa_3_00) {
15
+ tcg_out_addpcis(s, TCG_REG_TMP2, 0);
16
+ new_pool_label(s, arg, R_PPC_REL14, s->code_ptr, 0);
17
+ tcg_out32(s, LD | TAI(ret, TCG_REG_TMP2, 0));
18
+ return;
19
+ }
20
21
tmp = arg >> 31 >> 1;
22
tcg_out_movi(s, TCG_TYPE_I32, ret, tmp);
23
@@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
24
if (USE_REG_TB) {
25
rel = R_PPC_ADDR16;
26
add = ppc_tbrel_diff(s, NULL);
27
+ } else if (have_isa_3_00) {
28
+ tcg_out_addpcis(s, TCG_REG_TMP1, 0);
29
+ rel = R_PPC_REL14;
30
+ add = 0;
31
} else {
32
rel = R_PPC_ADDR32;
33
add = 0;
34
@@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
35
if (USE_REG_TB) {
36
tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0));
37
load_insn |= RA(TCG_REG_TB);
38
+ } else if (have_isa_3_00) {
39
+ tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
40
} else {
41
tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0));
42
tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
43
--
44
2.34.1
diff view generated by jsdifflib
New patch
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
---
3
tcg/ppc/tcg-target.c.inc | 11 +++++++++--
4
1 file changed, 9 insertions(+), 2 deletions(-)
1
5
6
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
7
index XXXXXXX..XXXXXXX 100644
8
--- a/tcg/ppc/tcg-target.c.inc
9
+++ b/tcg/ppc/tcg-target.c.inc
10
@@ -XXX,XX +XXX,XX @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg)
11
static void tcg_out_goto_tb(TCGContext *s, int which)
12
{
13
uintptr_t ptr = get_jmp_target_addr(s, which);
14
+ int16_t lo;
15
16
/* Direct branch will be patched by tb_target_set_jmp_target. */
17
set_jmp_insn_offset(s, which);
18
@@ -XXX,XX +XXX,XX @@ static void tcg_out_goto_tb(TCGContext *s, int which)
19
if (USE_REG_TB) {
20
ptrdiff_t offset = ppc_tbrel_diff(s, (void *)ptr);
21
tcg_out_mem_long(s, LD, LDX, TCG_REG_TMP1, TCG_REG_TB, offset);
22
+ } else if (have_isa_3_00) {
23
+ ptrdiff_t offset = tcg_pcrel_diff(s, (void *)ptr) - 4;
24
+ lo = offset;
25
+ tcg_out_addpcis(s, TCG_REG_TMP1, offset - lo);
26
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo);
27
} else {
28
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - (int16_t)ptr);
29
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, (int16_t)ptr);
30
+ lo = ptr;
31
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - lo);
32
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo);
33
}
34
35
tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
36
--
37
2.34.1
diff view generated by jsdifflib
New patch
1
PADDI can load 34-bit immediates and 34-bit pc-relative addresses.
1
2
3
Reviewed-by: Jordan Niethe <jniethe5@gmail.com>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/ppc/tcg-target.c.inc | 51 ++++++++++++++++++++++++++++++++++++++++
7
1 file changed, 51 insertions(+)
8
9
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/ppc/tcg-target.c.inc
12
+++ b/tcg/ppc/tcg-target.c.inc
13
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
14
return true;
15
}
16
17
+/* Ensure that the prefixed instruction does not cross a 64-byte boundary. */
18
+static bool tcg_out_need_prefix_align(TCGContext *s)
19
+{
20
+ return ((uintptr_t)s->code_ptr & 0x3f) == 0x3c;
21
+}
22
+
23
+static void tcg_out_prefix_align(TCGContext *s)
24
+{
25
+ if (tcg_out_need_prefix_align(s)) {
26
+ tcg_out32(s, NOP);
27
+ }
28
+}
29
+
30
+static ptrdiff_t tcg_pcrel_diff_for_prefix(TCGContext *s, const void *target)
31
+{
32
+ return tcg_pcrel_diff(s, target) - (tcg_out_need_prefix_align(s) ? 4 : 0);
33
+}
34
+
35
+/* Output Type 10 Prefix - Modified Load/Store Form (MLS:D) */
36
+static void tcg_out_mls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
37
+ unsigned ra, tcg_target_long imm, bool r)
38
+{
39
+ tcg_insn_unit p, i;
40
+
41
+ p = OPCD(1) | (2 << 24) | (r << 20) | ((imm >> 16) & 0x3ffff);
42
+ i = opc | TAI(rt, ra, imm);
43
+
44
+ tcg_out_prefix_align(s);
45
+ tcg_out32(s, p);
46
+ tcg_out32(s, i);
47
+}
48
+
49
static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
50
TCGReg base, tcg_target_long offset);
51
52
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
53
return;
54
}
55
56
+ /*
57
+ * Load values up to 34 bits, and pc-relative addresses,
58
+ * with one prefixed insn.
59
+ */
60
+ if (have_isa_3_10) {
61
+ if (arg == sextract64(arg, 0, 34)) {
62
+ /* pli ret,value = paddi ret,0,value,0 */
63
+ tcg_out_mls_d(s, ADDI, ret, 0, arg, 0);
64
+ return;
65
+ }
66
+
67
+ tmp = tcg_pcrel_diff_for_prefix(s, (void *)arg);
68
+ if (tmp == sextract64(tmp, 0, 34)) {
69
+ /* pla ret,value = paddi ret,0,value,1 */
70
+ tcg_out_mls_d(s, ADDI, ret, 0, tmp, 1);
71
+ return;
72
+ }
73
+ }
74
+
75
/* Load 32-bit immediates with two insns. Note that we've already
76
eliminated bare ADDIS, so we know both insns are required. */
77
if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
78
--
79
2.34.1
diff view generated by jsdifflib
New patch
1
When the offset is out of range of the non-prefixed insn, but
2
fits the 34-bit immediate of the prefixed insn, use that.
1
3
4
Reviewed-by: Jordan Niethe <jniethe5@gmail.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/ppc/tcg-target.c.inc | 66 ++++++++++++++++++++++++++++++++++++++++
8
1 file changed, 66 insertions(+)
9
10
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/ppc/tcg-target.c.inc
13
+++ b/tcg/ppc/tcg-target.c.inc
14
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
15
#define STDX XO31(149)
16
#define STQ XO62( 2)
17
18
+#define PLWA OPCD( 41)
19
+#define PLD OPCD( 57)
20
+#define PLXSD OPCD( 42)
21
+#define PLXV OPCD(25 * 2 + 1) /* force tx=1 */
22
+
23
+#define PSTD OPCD( 61)
24
+#define PSTXSD OPCD( 46)
25
+#define PSTXV OPCD(27 * 2 + 1) /* force sx=1 */
26
+
27
#define ADDIC OPCD( 12)
28
#define ADDI OPCD( 14)
29
#define ADDIS OPCD( 15)
30
@@ -XXX,XX +XXX,XX @@ static ptrdiff_t tcg_pcrel_diff_for_prefix(TCGContext *s, const void *target)
31
return tcg_pcrel_diff(s, target) - (tcg_out_need_prefix_align(s) ? 4 : 0);
32
}
33
34
+/* Output Type 00 Prefix - 8-Byte Load/Store Form (8LS:D) */
35
+static void tcg_out_8ls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
36
+ unsigned ra, tcg_target_long imm, bool r)
37
+{
38
+ tcg_insn_unit p, i;
39
+
40
+ p = OPCD(1) | (r << 20) | ((imm >> 16) & 0x3ffff);
41
+ i = opc | TAI(rt, ra, imm);
42
+
43
+ tcg_out_prefix_align(s);
44
+ tcg_out32(s, p);
45
+ tcg_out32(s, i);
46
+}
47
+
48
/* Output Type 10 Prefix - Modified Load/Store Form (MLS:D) */
49
static void tcg_out_mls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
50
unsigned ra, tcg_target_long imm, bool r)
51
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
52
break;
53
}
54
55
+ /* For unaligned or large offsets, use the prefixed form. */
56
+ if (have_isa_3_10
57
+ && (offset != (int16_t)offset || (offset & align))
58
+ && offset == sextract64(offset, 0, 34)) {
59
+ /*
60
+ * Note that the MLS:D insns retain their un-prefixed opcode,
61
+ * while the 8LS:D insns use a different opcode space.
62
+ */
63
+ switch (opi) {
64
+ case LBZ:
65
+ case LHZ:
66
+ case LHA:
67
+ case LWZ:
68
+ case STB:
69
+ case STH:
70
+ case STW:
71
+ case ADDI:
72
+ tcg_out_mls_d(s, opi, rt, base, offset, 0);
73
+ return;
74
+ case LWA:
75
+ tcg_out_8ls_d(s, PLWA, rt, base, offset, 0);
76
+ return;
77
+ case LD:
78
+ tcg_out_8ls_d(s, PLD, rt, base, offset, 0);
79
+ return;
80
+ case STD:
81
+ tcg_out_8ls_d(s, PSTD, rt, base, offset, 0);
82
+ return;
83
+ case LXSD:
84
+ tcg_out_8ls_d(s, PLXSD, rt & 31, base, offset, 0);
85
+ return;
86
+ case STXSD:
87
+ tcg_out_8ls_d(s, PSTXSD, rt & 31, base, offset, 0);
88
+ return;
89
+ case LXV:
90
+ tcg_out_8ls_d(s, PLXV, rt & 31, base, offset, 0);
91
+ return;
92
+ case STXV:
93
+ tcg_out_8ls_d(s, PSTXV, rt & 31, base, offset, 0);
94
+ return;
95
+ }
96
+ }
97
+
98
/* For unaligned, or very large offsets, use the indexed form. */
99
if (offset & align || offset != (int32_t)offset || opi == 0) {
100
if (rs == base) {
101
--
102
2.34.1
diff view generated by jsdifflib
New patch
1
The prefixed instruction has a pc-relative form to use here.
1
2
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
tcg/ppc/tcg-target.c.inc | 24 ++++++++++++++++++++++++
6
1 file changed, 24 insertions(+)
7
8
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/ppc/tcg-target.c.inc
11
+++ b/tcg/ppc/tcg-target.c.inc
12
@@ -XXX,XX +XXX,XX @@
13
#define ALL_GENERAL_REGS 0xffffffffu
14
#define ALL_VECTOR_REGS 0xffffffff00000000ull
15
16
+#ifndef R_PPC64_PCREL34
17
+#define R_PPC64_PCREL34 132
18
+#endif
19
+
20
#define have_isel (cpuinfo & CPUINFO_ISEL)
21
22
#ifndef CONFIG_SOFTMMU
23
@@ -XXX,XX +XXX,XX @@ static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
24
return false;
25
}
26
27
+static bool reloc_pc34(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
28
+{
29
+ const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
30
+ ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
31
+
32
+ if (disp == sextract64(disp, 0, 34)) {
33
+ src_rw[0] = (src_rw[0] & ~0x3ffff) | ((disp >> 16) & 0x3ffff);
34
+ src_rw[1] = (src_rw[1] & ~0xffff) | (disp & 0xffff);
35
+ return true;
36
+ }
37
+ return false;
38
+}
39
+
40
/* test if a constant matches the constraint */
41
static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
42
{
43
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
44
return reloc_pc14(code_ptr, target);
45
case R_PPC_REL24:
46
return reloc_pc24(code_ptr, target);
47
+ case R_PPC64_PCREL34:
48
+ return reloc_pc34(code_ptr, target);
49
case R_PPC_ADDR16:
50
/*
51
* We are (slightly) abusing this relocation type. In particular,
52
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
53
tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0));
54
return;
55
}
56
+ if (have_isa_3_10) {
57
+ tcg_out_8ls_d(s, PLD, ret, 0, 0, 1);
58
+ new_pool_label(s, arg, R_PPC64_PCREL34, s->code_ptr - 2, 0);
59
+ return;
60
+ }
61
if (have_isa_3_00) {
62
tcg_out_addpcis(s, TCG_REG_TMP2, 0);
63
new_pool_label(s, arg, R_PPC_REL14, s->code_ptr, 0);
64
--
65
2.34.1
diff view generated by jsdifflib
New patch
1
The prefixed instructions have a pc-relative form to use here.
1
2
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
tcg/ppc/tcg-target.c.inc | 9 +++++++++
6
1 file changed, 9 insertions(+)
7
8
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/ppc/tcg-target.c.inc
11
+++ b/tcg/ppc/tcg-target.c.inc
12
@@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
13
if (USE_REG_TB) {
14
rel = R_PPC_ADDR16;
15
add = ppc_tbrel_diff(s, NULL);
16
+ } else if (have_isa_3_10) {
17
+ if (type == TCG_TYPE_V64) {
18
+ tcg_out_8ls_d(s, PLXSD, ret & 31, 0, 0, 1);
19
+ new_pool_label(s, val, R_PPC64_PCREL34, s->code_ptr - 2, 0);
20
+ } else {
21
+ tcg_out_8ls_d(s, PLXV, ret & 31, 0, 0, 1);
22
+ new_pool_l2(s, R_PPC64_PCREL34, s->code_ptr - 2, 0, val, val);
23
+ }
24
+ return;
25
} else if (have_isa_3_00) {
26
tcg_out_addpcis(s, TCG_REG_TMP1, 0);
27
rel = R_PPC_REL14;
28
--
29
2.34.1
diff view generated by jsdifflib
New patch
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
---
3
tcg/ppc/tcg-target.c.inc | 3 +++
4
1 file changed, 3 insertions(+)
1
5
6
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
7
index XXXXXXX..XXXXXXX 100644
8
--- a/tcg/ppc/tcg-target.c.inc
9
+++ b/tcg/ppc/tcg-target.c.inc
10
@@ -XXX,XX +XXX,XX @@ static void tcg_out_goto_tb(TCGContext *s, int which)
11
if (USE_REG_TB) {
12
ptrdiff_t offset = ppc_tbrel_diff(s, (void *)ptr);
13
tcg_out_mem_long(s, LD, LDX, TCG_REG_TMP1, TCG_REG_TB, offset);
14
+ } else if (have_isa_3_10) {
15
+ ptrdiff_t offset = tcg_pcrel_diff_for_prefix(s, (void *)ptr);
16
+ tcg_out_8ls_d(s, PLD, TCG_REG_TMP1, 0, offset, 1);
17
} else if (have_isa_3_00) {
18
ptrdiff_t offset = tcg_pcrel_diff(s, (void *)ptr) - 4;
19
lo = offset;
20
--
21
2.34.1
diff view generated by jsdifflib
New patch
1
This appears to slightly improve performance on power9/10.
1
2
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
tcg/ppc/tcg-target.c.inc | 2 +-
6
1 file changed, 1 insertion(+), 1 deletion(-)
7
8
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/ppc/tcg-target.c.inc
11
+++ b/tcg/ppc/tcg-target.c.inc
12
@@ -XXX,XX +XXX,XX @@
13
#define TCG_VEC_TMP2 TCG_REG_V1
14
15
#define TCG_REG_TB TCG_REG_R31
16
-#define USE_REG_TB (TCG_TARGET_REG_BITS == 64)
17
+#define USE_REG_TB (TCG_TARGET_REG_BITS == 64 && !have_isa_3_00)
18
19
/* Shorthand for size of a pointer. Avoid promotion to unsigned. */
20
#define SZP ((int)sizeof(void *))
21
--
22
2.34.1
diff view generated by jsdifflib
New patch
1
Begin disconnecting CONFIG_SOFTMMU from !CONFIG_USER_ONLY.
2
Introduce a variable which can be set at startup to select
3
one method or another for user-only.
1
4
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
include/tcg/tcg.h | 8 ++++++--
9
tcg/tcg-op-ldst.c | 14 +++++++-------
10
tcg/tcg.c | 9 ++++++---
11
3 files changed, 19 insertions(+), 12 deletions(-)
12
13
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/include/tcg/tcg.h
16
+++ b/include/tcg/tcg.h
17
@@ -XXX,XX +XXX,XX @@ struct TCGContext {
18
int nb_ops;
19
TCGType addr_type; /* TCG_TYPE_I32 or TCG_TYPE_I64 */
20
21
-#ifdef CONFIG_SOFTMMU
22
int page_mask;
23
uint8_t page_bits;
24
uint8_t tlb_dyn_max_bits;
25
-#endif
26
uint8_t insn_start_words;
27
TCGBar guest_mo;
28
29
@@ -XXX,XX +XXX,XX @@ static inline bool temp_readonly(TCGTemp *ts)
30
return ts->kind >= TEMP_FIXED;
31
}
32
33
+#ifdef CONFIG_USER_ONLY
34
+extern bool tcg_use_softmmu;
35
+#else
36
+#define tcg_use_softmmu true
37
+#endif
38
+
39
extern __thread TCGContext *tcg_ctx;
40
extern const void *tcg_code_gen_epilogue;
41
extern uintptr_t tcg_splitwx_diff;
42
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/tcg/tcg-op-ldst.c
45
+++ b/tcg/tcg-op-ldst.c
46
@@ -XXX,XX +XXX,XX @@
47
48
static void check_max_alignment(unsigned a_bits)
49
{
50
-#if defined(CONFIG_SOFTMMU)
51
/*
52
* The requested alignment cannot overlap the TLB flags.
53
* FIXME: Must keep the count up-to-date with "exec/cpu-all.h".
54
*/
55
- tcg_debug_assert(a_bits + 5 <= tcg_ctx->page_bits);
56
-#endif
57
+ if (tcg_use_softmmu) {
58
+ tcg_debug_assert(a_bits + 5 <= tcg_ctx->page_bits);
59
+ }
60
}
61
62
static MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
63
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i64_chk(TCGv_i64 val, TCGTemp *addr, TCGArg idx,
64
*/
65
static bool use_two_i64_for_i128(MemOp mop)
66
{
67
-#ifdef CONFIG_SOFTMMU
68
/* Two softmmu tlb lookups is larger than one function call. */
69
- return false;
70
-#else
71
+ if (tcg_use_softmmu) {
72
+ return false;
73
+ }
74
+
75
/*
76
* For user-only, two 64-bit operations may well be smaller than a call.
77
* Determine if that would be legal for the requested atomicity.
78
@@ -XXX,XX +XXX,XX @@ static bool use_two_i64_for_i128(MemOp mop)
79
default:
80
g_assert_not_reached();
81
}
82
-#endif
83
}
84
85
static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
86
diff --git a/tcg/tcg.c b/tcg/tcg.c
87
index XXXXXXX..XXXXXXX 100644
88
--- a/tcg/tcg.c
89
+++ b/tcg/tcg.c
90
@@ -XXX,XX +XXX,XX @@ static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
91
MemOp host_atom, bool allow_two_ops)
92
__attribute__((unused));
93
94
+#ifdef CONFIG_USER_ONLY
95
+bool tcg_use_softmmu;
96
+#endif
97
+
98
TCGContext tcg_init_ctx;
99
__thread TCGContext *tcg_ctx;
100
101
@@ -XXX,XX +XXX,XX @@ static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
102
return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
103
}
104
105
-#if defined(CONFIG_SOFTMMU) && !defined(CONFIG_TCG_INTERPRETER)
106
-static int tlb_mask_table_ofs(TCGContext *s, int which)
107
+static int __attribute__((unused))
108
+tlb_mask_table_ofs(TCGContext *s, int which)
109
{
110
return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
111
sizeof(CPUNegativeOffsetState));
112
}
113
-#endif
114
115
/* Signal overflow, starting over with fewer guest insns. */
116
static G_NORETURN
117
--
118
2.34.1
119
120
diff view generated by jsdifflib
New patch
1
Provide a define to allow !tcg_use_softmmu code paths to
2
compile in system mode, but require elimination.
1
3
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/tcg.c | 4 ++++
8
1 file changed, 4 insertions(+)
9
10
diff --git a/tcg/tcg.c b/tcg/tcg.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/tcg.c
13
+++ b/tcg/tcg.c
14
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece);
15
static int tcg_out_ldst_finalize(TCGContext *s);
16
#endif
17
18
+#ifndef CONFIG_USER_ONLY
19
+#define guest_base ({ qemu_build_not_reached(); (uintptr_t)0; })
20
+#endif
21
+
22
typedef struct TCGLdstHelperParam {
23
TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
24
unsigned ntmp;
25
--
26
2.34.1
27
28
diff view generated by jsdifflib
New patch
1
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/arm/tcg-target.c.inc | 203 +++++++++++++++++++--------------------
5
1 file changed, 97 insertions(+), 106 deletions(-)
1
6
7
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/arm/tcg-target.c.inc
10
+++ b/tcg/arm/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
12
13
#define TCG_REG_TMP TCG_REG_R12
14
#define TCG_VEC_TMP TCG_REG_Q15
15
-#ifndef CONFIG_SOFTMMU
16
#define TCG_REG_GUEST_BASE TCG_REG_R11
17
-#endif
18
19
typedef enum {
20
COND_EQ = 0x0,
21
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
22
* r0-r3 will be overwritten when reading the tlb entry (system-mode only);
23
* r14 will be overwritten by the BLNE branching to the slow path.
24
*/
25
-#ifdef CONFIG_SOFTMMU
26
#define ALL_QLDST_REGS \
27
- (ALL_GENERAL_REGS & ~((1 << TCG_REG_R0) | (1 << TCG_REG_R1) | \
28
- (1 << TCG_REG_R2) | (1 << TCG_REG_R3) | \
29
- (1 << TCG_REG_R14)))
30
-#else
31
-#define ALL_QLDST_REGS (ALL_GENERAL_REGS & ~(1 << TCG_REG_R14))
32
-#endif
33
+ (ALL_GENERAL_REGS & ~((tcg_use_softmmu ? 0xf : 0) | (1 << TCG_REG_R14)))
34
35
/*
36
* ARM immediates for ALU instructions are made of an unsigned 8-bit
37
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
38
MemOp opc = get_memop(oi);
39
unsigned a_mask;
40
41
-#ifdef CONFIG_SOFTMMU
42
- *h = (HostAddress){
43
- .cond = COND_AL,
44
- .base = addrlo,
45
- .index = TCG_REG_R1,
46
- .index_scratch = true,
47
- };
48
-#else
49
- *h = (HostAddress){
50
- .cond = COND_AL,
51
- .base = addrlo,
52
- .index = guest_base ? TCG_REG_GUEST_BASE : -1,
53
- .index_scratch = false,
54
- };
55
-#endif
56
+ if (tcg_use_softmmu) {
57
+ *h = (HostAddress){
58
+ .cond = COND_AL,
59
+ .base = addrlo,
60
+ .index = TCG_REG_R1,
61
+ .index_scratch = true,
62
+ };
63
+ } else {
64
+ *h = (HostAddress){
65
+ .cond = COND_AL,
66
+ .base = addrlo,
67
+ .index = guest_base ? TCG_REG_GUEST_BASE : -1,
68
+ .index_scratch = false,
69
+ };
70
+ }
71
72
h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
73
a_mask = (1 << h->aa.align) - 1;
74
75
-#ifdef CONFIG_SOFTMMU
76
- int mem_index = get_mmuidx(oi);
77
- int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
78
- : offsetof(CPUTLBEntry, addr_write);
79
- int fast_off = tlb_mask_table_ofs(s, mem_index);
80
- unsigned s_mask = (1 << (opc & MO_SIZE)) - 1;
81
- TCGReg t_addr;
82
+ if (tcg_use_softmmu) {
83
+ int mem_index = get_mmuidx(oi);
84
+ int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
85
+ : offsetof(CPUTLBEntry, addr_write);
86
+ int fast_off = tlb_mask_table_ofs(s, mem_index);
87
+ unsigned s_mask = (1 << (opc & MO_SIZE)) - 1;
88
+ TCGReg t_addr;
89
90
- ldst = new_ldst_label(s);
91
- ldst->is_ld = is_ld;
92
- ldst->oi = oi;
93
- ldst->addrlo_reg = addrlo;
94
- ldst->addrhi_reg = addrhi;
95
+ ldst = new_ldst_label(s);
96
+ ldst->is_ld = is_ld;
97
+ ldst->oi = oi;
98
+ ldst->addrlo_reg = addrlo;
99
+ ldst->addrhi_reg = addrhi;
100
101
- /* Load cpu->neg.tlb.f[mmu_idx].{mask,table} into {r0,r1}. */
102
- QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
103
- QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4);
104
- tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off);
105
+ /* Load cpu->neg.tlb.f[mmu_idx].{mask,table} into {r0,r1}. */
106
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
107
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4);
108
+ tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off);
109
110
- /* Extract the tlb index from the address into R0. */
111
- tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo,
112
- SHIFT_IMM_LSR(s->page_bits - CPU_TLB_ENTRY_BITS));
113
+ /* Extract the tlb index from the address into R0. */
114
+ tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo,
115
+ SHIFT_IMM_LSR(s->page_bits - CPU_TLB_ENTRY_BITS));
116
117
- /*
118
- * Add the tlb_table pointer, creating the CPUTLBEntry address in R1.
119
- * Load the tlb comparator into R2/R3 and the fast path addend into R1.
120
- */
121
- QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
122
- if (cmp_off == 0) {
123
- if (s->addr_type == TCG_TYPE_I32) {
124
- tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
125
+ /*
126
+ * Add the tlb_table pointer, creating the CPUTLBEntry address in R1.
127
+ * Load the tlb comparator into R2/R3 and the fast path addend into R1.
128
+ */
129
+ QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
130
+ if (cmp_off == 0) {
131
+ if (s->addr_type == TCG_TYPE_I32) {
132
+ tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2,
133
+ TCG_REG_R1, TCG_REG_R0);
134
+ } else {
135
+ tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2,
136
+ TCG_REG_R1, TCG_REG_R0);
137
+ }
138
} else {
139
- tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
140
+ tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
141
+ TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0);
142
+ if (s->addr_type == TCG_TYPE_I32) {
143
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
144
+ } else {
145
+ tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
146
+ }
147
}
148
- } else {
149
- tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
150
- TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0);
151
- if (s->addr_type == TCG_TYPE_I32) {
152
- tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
153
+
154
+ /* Load the tlb addend. */
155
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R1,
156
+ offsetof(CPUTLBEntry, addend));
157
+
158
+ /*
159
+ * Check alignment, check comparators.
160
+ * Do this in 2-4 insns. Use MOVW for v7, if possible,
161
+ * to reduce the number of sequential conditional instructions.
162
+ * Almost all guests have at least 4k pages, which means that we need
163
+ * to clear at least 9 bits even for an 8-byte memory, which means it
164
+ * isn't worth checking for an immediate operand for BIC.
165
+ *
166
+ * For unaligned accesses, test the page of the last unit of alignment.
167
+ * This leaves the least significant alignment bits unchanged, and of
168
+ * course must be zero.
169
+ */
170
+ t_addr = addrlo;
171
+ if (a_mask < s_mask) {
172
+ t_addr = TCG_REG_R0;
173
+ tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr,
174
+ addrlo, s_mask - a_mask);
175
+ }
176
+ if (use_armv7_instructions && s->page_bits <= 16) {
177
+ tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(s->page_mask | a_mask));
178
+ tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
179
+ t_addr, TCG_REG_TMP, 0);
180
+ tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0,
181
+ TCG_REG_R2, TCG_REG_TMP, 0);
182
} else {
183
- tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
184
+ if (a_mask) {
185
+ tcg_debug_assert(a_mask <= 0xff);
186
+ tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
187
+ }
188
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, t_addr,
189
+ SHIFT_IMM_LSR(s->page_bits));
190
+ tcg_out_dat_reg(s, (a_mask ? COND_EQ : COND_AL), ARITH_CMP,
191
+ 0, TCG_REG_R2, TCG_REG_TMP,
192
+ SHIFT_IMM_LSL(s->page_bits));
193
}
194
- }
195
196
- /* Load the tlb addend. */
197
- tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R1,
198
- offsetof(CPUTLBEntry, addend));
199
-
200
- /*
201
- * Check alignment, check comparators.
202
- * Do this in 2-4 insns. Use MOVW for v7, if possible,
203
- * to reduce the number of sequential conditional instructions.
204
- * Almost all guests have at least 4k pages, which means that we need
205
- * to clear at least 9 bits even for an 8-byte memory, which means it
206
- * isn't worth checking for an immediate operand for BIC.
207
- *
208
- * For unaligned accesses, test the page of the last unit of alignment.
209
- * This leaves the least significant alignment bits unchanged, and of
210
- * course must be zero.
211
- */
212
- t_addr = addrlo;
213
- if (a_mask < s_mask) {
214
- t_addr = TCG_REG_R0;
215
- tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr,
216
- addrlo, s_mask - a_mask);
217
- }
218
- if (use_armv7_instructions && s->page_bits <= 16) {
219
- tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(s->page_mask | a_mask));
220
- tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
221
- t_addr, TCG_REG_TMP, 0);
222
- tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, 0);
223
- } else {
224
- if (a_mask) {
225
- tcg_debug_assert(a_mask <= 0xff);
226
- tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
227
+ if (s->addr_type != TCG_TYPE_I32) {
228
+ tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0);
229
}
230
- tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, t_addr,
231
- SHIFT_IMM_LSR(s->page_bits));
232
- tcg_out_dat_reg(s, (a_mask ? COND_EQ : COND_AL), ARITH_CMP,
233
- 0, TCG_REG_R2, TCG_REG_TMP,
234
- SHIFT_IMM_LSL(s->page_bits));
235
- }
236
-
237
- if (s->addr_type != TCG_TYPE_I32) {
238
- tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0);
239
- }
240
-#else
241
- if (a_mask) {
242
+ } else if (a_mask) {
243
ldst = new_ldst_label(s);
244
ldst->is_ld = is_ld;
245
ldst->oi = oi;
246
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
247
/* tst addr, #mask */
248
tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
249
}
250
-#endif
251
252
return ldst;
253
}
254
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
255
256
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
257
258
-#ifndef CONFIG_SOFTMMU
259
- if (guest_base) {
260
+ if (!tcg_use_softmmu && guest_base) {
261
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
262
tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
263
}
264
-#endif
265
266
tcg_out_b_reg(s, COND_AL, tcg_target_call_iarg_regs[1]);
267
268
--
269
2.34.1
270
271
diff view generated by jsdifflib
New patch
1
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/aarch64/tcg-target.c.inc | 177 +++++++++++++++++------------------
5
1 file changed, 88 insertions(+), 89 deletions(-)
1
6
7
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/aarch64/tcg-target.c.inc
10
+++ b/tcg/aarch64/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
12
#define TCG_REG_TMP2 TCG_REG_X30
13
#define TCG_VEC_TMP0 TCG_REG_V31
14
15
-#ifndef CONFIG_SOFTMMU
16
#define TCG_REG_GUEST_BASE TCG_REG_X28
17
-#endif
18
19
static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
20
{
21
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
22
s_bits == MO_128);
23
a_mask = (1 << h->aa.align) - 1;
24
25
-#ifdef CONFIG_SOFTMMU
26
- unsigned s_mask = (1u << s_bits) - 1;
27
- unsigned mem_index = get_mmuidx(oi);
28
- TCGReg addr_adj;
29
- TCGType mask_type;
30
- uint64_t compare_mask;
31
+ if (tcg_use_softmmu) {
32
+ unsigned s_mask = (1u << s_bits) - 1;
33
+ unsigned mem_index = get_mmuidx(oi);
34
+ TCGReg addr_adj;
35
+ TCGType mask_type;
36
+ uint64_t compare_mask;
37
38
- ldst = new_ldst_label(s);
39
- ldst->is_ld = is_ld;
40
- ldst->oi = oi;
41
- ldst->addrlo_reg = addr_reg;
42
-
43
- mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
44
- ? TCG_TYPE_I64 : TCG_TYPE_I32);
45
-
46
- /* Load cpu->neg.tlb.f[mmu_idx].{mask,table} into {tmp0,tmp1}. */
47
- QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
48
- QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
49
- tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0,
50
- tlb_mask_table_ofs(s, mem_index), 1, 0);
51
-
52
- /* Extract the TLB index from the address into X0. */
53
- tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
54
- TCG_REG_TMP0, TCG_REG_TMP0, addr_reg,
55
- s->page_bits - CPU_TLB_ENTRY_BITS);
56
-
57
- /* Add the tlb_table pointer, forming the CPUTLBEntry address in TMP1. */
58
- tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0);
59
-
60
- /* Load the tlb comparator into TMP0, and the fast path addend into TMP1. */
61
- QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
62
- tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP1,
63
- is_ld ? offsetof(CPUTLBEntry, addr_read)
64
- : offsetof(CPUTLBEntry, addr_write));
65
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
66
- offsetof(CPUTLBEntry, addend));
67
-
68
- /*
69
- * For aligned accesses, we check the first byte and include the alignment
70
- * bits within the address. For unaligned access, we check that we don't
71
- * cross pages using the address of the last byte of the access.
72
- */
73
- if (a_mask >= s_mask) {
74
- addr_adj = addr_reg;
75
- } else {
76
- addr_adj = TCG_REG_TMP2;
77
- tcg_out_insn(s, 3401, ADDI, addr_type,
78
- addr_adj, addr_reg, s_mask - a_mask);
79
- }
80
- compare_mask = (uint64_t)s->page_mask | a_mask;
81
-
82
- /* Store the page mask part of the address into TMP2. */
83
- tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2,
84
- addr_adj, compare_mask);
85
-
86
- /* Perform the address comparison. */
87
- tcg_out_cmp(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2, 0);
88
-
89
- /* If not equal, we jump to the slow path. */
90
- ldst->label_ptr[0] = s->code_ptr;
91
- tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
92
-
93
- h->base = TCG_REG_TMP1;
94
- h->index = addr_reg;
95
- h->index_ext = addr_type;
96
-#else
97
- if (a_mask) {
98
ldst = new_ldst_label(s);
99
-
100
ldst->is_ld = is_ld;
101
ldst->oi = oi;
102
ldst->addrlo_reg = addr_reg;
103
104
- /* tst addr, #mask */
105
- tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
106
+ mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
107
+ ? TCG_TYPE_I64 : TCG_TYPE_I32);
108
109
- /* b.ne slow_path */
110
+ /* Load cpu->neg.tlb.f[mmu_idx].{mask,table} into {tmp0,tmp1}. */
111
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
112
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
113
+ tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0,
114
+ tlb_mask_table_ofs(s, mem_index), 1, 0);
115
+
116
+ /* Extract the TLB index from the address into X0. */
117
+ tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
118
+ TCG_REG_TMP0, TCG_REG_TMP0, addr_reg,
119
+ s->page_bits - CPU_TLB_ENTRY_BITS);
120
+
121
+ /* Add the tlb_table pointer, forming the CPUTLBEntry address. */
122
+ tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0);
123
+
124
+ /* Load the tlb comparator into TMP0, and the fast path addend. */
125
+ QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
126
+ tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP1,
127
+ is_ld ? offsetof(CPUTLBEntry, addr_read)
128
+ : offsetof(CPUTLBEntry, addr_write));
129
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
130
+ offsetof(CPUTLBEntry, addend));
131
+
132
+ /*
133
+ * For aligned accesses, we check the first byte and include
134
+ * the alignment bits within the address. For unaligned access,
135
+ * we check that we don't cross pages using the address of the
136
+ * last byte of the access.
137
+ */
138
+ if (a_mask >= s_mask) {
139
+ addr_adj = addr_reg;
140
+ } else {
141
+ addr_adj = TCG_REG_TMP2;
142
+ tcg_out_insn(s, 3401, ADDI, addr_type,
143
+ addr_adj, addr_reg, s_mask - a_mask);
144
+ }
145
+ compare_mask = (uint64_t)s->page_mask | a_mask;
146
+
147
+ /* Store the page mask part of the address into TMP2. */
148
+ tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2,
149
+ addr_adj, compare_mask);
150
+
151
+ /* Perform the address comparison. */
152
+ tcg_out_cmp(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2, 0);
153
+
154
+ /* If not equal, we jump to the slow path. */
155
ldst->label_ptr[0] = s->code_ptr;
156
tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
157
- }
158
159
- if (guest_base || addr_type == TCG_TYPE_I32) {
160
- h->base = TCG_REG_GUEST_BASE;
161
+ h->base = TCG_REG_TMP1;
162
h->index = addr_reg;
163
h->index_ext = addr_type;
164
} else {
165
- h->base = addr_reg;
166
- h->index = TCG_REG_XZR;
167
- h->index_ext = TCG_TYPE_I64;
168
+ if (a_mask) {
169
+ ldst = new_ldst_label(s);
170
+
171
+ ldst->is_ld = is_ld;
172
+ ldst->oi = oi;
173
+ ldst->addrlo_reg = addr_reg;
174
+
175
+ /* tst addr, #mask */
176
+ tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
177
+
178
+ /* b.ne slow_path */
179
+ ldst->label_ptr[0] = s->code_ptr;
180
+ tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
181
+ }
182
+
183
+ if (guest_base || addr_type == TCG_TYPE_I32) {
184
+ h->base = TCG_REG_GUEST_BASE;
185
+ h->index = addr_reg;
186
+ h->index_ext = addr_type;
187
+ } else {
188
+ h->base = addr_reg;
189
+ h->index = TCG_REG_XZR;
190
+ h->index_ext = TCG_TYPE_I64;
191
+ }
192
}
193
-#endif
194
195
return ldst;
196
}
197
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
198
tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
199
CPU_TEMP_BUF_NLONGS * sizeof(long));
200
201
-#if !defined(CONFIG_SOFTMMU)
202
- /*
203
- * Note that XZR cannot be encoded in the address base register slot,
204
- * as that actually encodes SP. Depending on the guest, we may need
205
- * to zero-extend the guest address via the address index register slot,
206
- * therefore we need to load even a zero guest base into a register.
207
- */
208
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
209
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
210
-#endif
211
+ if (!tcg_use_softmmu) {
212
+ /*
213
+ * Note that XZR cannot be encoded in the address base register slot,
214
+ * as that actually encodes SP. Depending on the guest, we may need
215
+ * to zero-extend the guest address via the address index register slot,
216
+ * therefore we need to load even a zero guest base into a register.
217
+ */
218
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
219
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
220
+ }
221
222
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
223
tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
224
--
225
2.34.1
226
227
diff view generated by jsdifflib
New patch
1
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/i386/tcg-target.c.inc | 198 +++++++++++++++++++-------------------
5
1 file changed, 98 insertions(+), 100 deletions(-)
1
6
7
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/i386/tcg-target.c.inc
10
+++ b/tcg/i386/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
12
# define ALL_VECTOR_REGS 0x00ff0000u
13
# define ALL_BYTEL_REGS 0x0000000fu
14
#endif
15
-#ifdef CONFIG_SOFTMMU
16
-# define SOFTMMU_RESERVE_REGS ((1 << TCG_REG_L0) | (1 << TCG_REG_L1))
17
-#else
18
-# define SOFTMMU_RESERVE_REGS 0
19
-#endif
20
+#define SOFTMMU_RESERVE_REGS \
21
+ (tcg_use_softmmu ? (1 << TCG_REG_L0) | (1 << TCG_REG_L1) : 0)
22
23
/* For 64-bit, we always know that CMOV is available. */
24
#if TCG_TARGET_REG_BITS == 64
25
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
26
return true;
27
}
28
29
-#ifndef CONFIG_SOFTMMU
30
+#ifdef CONFIG_USER_ONLY
31
static HostAddress x86_guest_base = {
32
.index = -1
33
};
34
@@ -XXX,XX +XXX,XX @@ static inline int setup_guest_base_seg(void)
35
}
36
return 0;
37
}
38
+#define setup_guest_base_seg setup_guest_base_seg
39
#elif defined(__x86_64__) && \
40
(defined (__FreeBSD__) || defined (__FreeBSD_kernel__))
41
# include <machine/sysarch.h>
42
@@ -XXX,XX +XXX,XX @@ static inline int setup_guest_base_seg(void)
43
}
44
return 0;
45
}
46
+#define setup_guest_base_seg setup_guest_base_seg
47
+#endif
48
#else
49
-static inline int setup_guest_base_seg(void)
50
-{
51
- return 0;
52
-}
53
-#endif /* setup_guest_base_seg */
54
-#endif /* !SOFTMMU */
55
+# define x86_guest_base (*(HostAddress *)({ qemu_build_not_reached(); NULL; }))
56
+#endif /* CONFIG_USER_ONLY */
57
+#ifndef setup_guest_base_seg
58
+# define setup_guest_base_seg() 0
59
+#endif
60
61
#define MIN_TLB_MASK_TABLE_OFS INT_MIN
62
63
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
64
MemOp s_bits = opc & MO_SIZE;
65
unsigned a_mask;
66
67
-#ifdef CONFIG_SOFTMMU
68
- h->index = TCG_REG_L0;
69
- h->ofs = 0;
70
- h->seg = 0;
71
-#else
72
- *h = x86_guest_base;
73
-#endif
74
+ if (tcg_use_softmmu) {
75
+ h->index = TCG_REG_L0;
76
+ h->ofs = 0;
77
+ h->seg = 0;
78
+ } else {
79
+ *h = x86_guest_base;
80
+ }
81
h->base = addrlo;
82
h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128);
83
a_mask = (1 << h->aa.align) - 1;
84
85
-#ifdef CONFIG_SOFTMMU
86
- int cmp_ofs = is_ld ? offsetof(CPUTLBEntry, addr_read)
87
- : offsetof(CPUTLBEntry, addr_write);
88
- TCGType ttype = TCG_TYPE_I32;
89
- TCGType tlbtype = TCG_TYPE_I32;
90
- int trexw = 0, hrexw = 0, tlbrexw = 0;
91
- unsigned mem_index = get_mmuidx(oi);
92
- unsigned s_mask = (1 << s_bits) - 1;
93
- int fast_ofs = tlb_mask_table_ofs(s, mem_index);
94
- int tlb_mask;
95
+ if (tcg_use_softmmu) {
96
+ int cmp_ofs = is_ld ? offsetof(CPUTLBEntry, addr_read)
97
+ : offsetof(CPUTLBEntry, addr_write);
98
+ TCGType ttype = TCG_TYPE_I32;
99
+ TCGType tlbtype = TCG_TYPE_I32;
100
+ int trexw = 0, hrexw = 0, tlbrexw = 0;
101
+ unsigned mem_index = get_mmuidx(oi);
102
+ unsigned s_mask = (1 << s_bits) - 1;
103
+ int fast_ofs = tlb_mask_table_ofs(s, mem_index);
104
+ int tlb_mask;
105
106
- ldst = new_ldst_label(s);
107
- ldst->is_ld = is_ld;
108
- ldst->oi = oi;
109
- ldst->addrlo_reg = addrlo;
110
- ldst->addrhi_reg = addrhi;
111
+ ldst = new_ldst_label(s);
112
+ ldst->is_ld = is_ld;
113
+ ldst->oi = oi;
114
+ ldst->addrlo_reg = addrlo;
115
+ ldst->addrhi_reg = addrhi;
116
117
- if (TCG_TARGET_REG_BITS == 64) {
118
- ttype = s->addr_type;
119
- trexw = (ttype == TCG_TYPE_I32 ? 0 : P_REXW);
120
- if (TCG_TYPE_PTR == TCG_TYPE_I64) {
121
- hrexw = P_REXW;
122
- if (s->page_bits + s->tlb_dyn_max_bits > 32) {
123
- tlbtype = TCG_TYPE_I64;
124
- tlbrexw = P_REXW;
125
+ if (TCG_TARGET_REG_BITS == 64) {
126
+ ttype = s->addr_type;
127
+ trexw = (ttype == TCG_TYPE_I32 ? 0 : P_REXW);
128
+ if (TCG_TYPE_PTR == TCG_TYPE_I64) {
129
+ hrexw = P_REXW;
130
+ if (s->page_bits + s->tlb_dyn_max_bits > 32) {
131
+ tlbtype = TCG_TYPE_I64;
132
+ tlbrexw = P_REXW;
133
+ }
134
}
135
}
136
- }
137
138
- tcg_out_mov(s, tlbtype, TCG_REG_L0, addrlo);
139
- tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0,
140
- s->page_bits - CPU_TLB_ENTRY_BITS);
141
+ tcg_out_mov(s, tlbtype, TCG_REG_L0, addrlo);
142
+ tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0,
143
+ s->page_bits - CPU_TLB_ENTRY_BITS);
144
145
- tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0,
146
- fast_ofs + offsetof(CPUTLBDescFast, mask));
147
+ tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0,
148
+ fast_ofs + offsetof(CPUTLBDescFast, mask));
149
150
- tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L0, TCG_AREG0,
151
- fast_ofs + offsetof(CPUTLBDescFast, table));
152
+ tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L0, TCG_AREG0,
153
+ fast_ofs + offsetof(CPUTLBDescFast, table));
154
155
- /*
156
- * If the required alignment is at least as large as the access, simply
157
- * copy the address and mask. For lesser alignments, check that we don't
158
- * cross pages for the complete access.
159
- */
160
- if (a_mask >= s_mask) {
161
- tcg_out_mov(s, ttype, TCG_REG_L1, addrlo);
162
- } else {
163
- tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1,
164
- addrlo, s_mask - a_mask);
165
- }
166
- tlb_mask = s->page_mask | a_mask;
167
- tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0);
168
+ /*
169
+ * If the required alignment is at least as large as the access,
170
+ * simply copy the address and mask. For lesser alignments,
171
+ * check that we don't cross pages for the complete access.
172
+ */
173
+ if (a_mask >= s_mask) {
174
+ tcg_out_mov(s, ttype, TCG_REG_L1, addrlo);
175
+ } else {
176
+ tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1,
177
+ addrlo, s_mask - a_mask);
178
+ }
179
+ tlb_mask = s->page_mask | a_mask;
180
+ tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0);
181
182
- /* cmp 0(TCG_REG_L0), TCG_REG_L1 */
183
- tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw,
184
- TCG_REG_L1, TCG_REG_L0, cmp_ofs);
185
-
186
- /* jne slow_path */
187
- tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
188
- ldst->label_ptr[0] = s->code_ptr;
189
- s->code_ptr += 4;
190
-
191
- if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I64) {
192
- /* cmp 4(TCG_REG_L0), addrhi */
193
- tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, TCG_REG_L0, cmp_ofs + 4);
194
+ /* cmp 0(TCG_REG_L0), TCG_REG_L1 */
195
+ tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw,
196
+ TCG_REG_L1, TCG_REG_L0, cmp_ofs);
197
198
/* jne slow_path */
199
tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
200
- ldst->label_ptr[1] = s->code_ptr;
201
+ ldst->label_ptr[0] = s->code_ptr;
202
s->code_ptr += 4;
203
- }
204
205
- /* TLB Hit. */
206
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_L0, TCG_REG_L0,
207
- offsetof(CPUTLBEntry, addend));
208
-#else
209
- if (a_mask) {
210
+ if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I64) {
211
+ /* cmp 4(TCG_REG_L0), addrhi */
212
+ tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi,
213
+ TCG_REG_L0, cmp_ofs + 4);
214
+
215
+ /* jne slow_path */
216
+ tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
217
+ ldst->label_ptr[1] = s->code_ptr;
218
+ s->code_ptr += 4;
219
+ }
220
+
221
+ /* TLB Hit. */
222
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_L0, TCG_REG_L0,
223
+ offsetof(CPUTLBEntry, addend));
224
+ } else if (a_mask) {
225
ldst = new_ldst_label(s);
226
227
ldst->is_ld = is_ld;
228
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
229
ldst->label_ptr[0] = s->code_ptr;
230
s->code_ptr += 4;
231
}
232
-#endif
233
234
return ldst;
235
}
236
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
237
tcg_out_push(s, tcg_target_callee_save_regs[i]);
238
}
239
240
-#if TCG_TARGET_REG_BITS == 32
241
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
242
- (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
243
- tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
244
- /* jmp *tb. */
245
- tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
246
- (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
247
- + stack_addend);
248
-#else
249
-# if !defined(CONFIG_SOFTMMU)
250
- if (guest_base) {
251
+ if (!tcg_use_softmmu && guest_base) {
252
int seg = setup_guest_base_seg();
253
if (seg != 0) {
254
x86_guest_base.seg = seg;
255
} else if (guest_base == (int32_t)guest_base) {
256
x86_guest_base.ofs = guest_base;
257
} else {
258
+ assert(TCG_TARGET_REG_BITS == 64);
259
/* Choose R12 because, as a base, it requires a SIB byte. */
260
x86_guest_base.index = TCG_REG_R12;
261
tcg_out_movi(s, TCG_TYPE_PTR, x86_guest_base.index, guest_base);
262
tcg_regset_set_reg(s->reserved_regs, x86_guest_base.index);
263
}
264
}
265
-# endif
266
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
267
- tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
268
- /* jmp *tb. */
269
- tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
270
-#endif
271
+
272
+ if (TCG_TARGET_REG_BITS == 32) {
273
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
274
+ (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
275
+ tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
276
+ /* jmp *tb. */
277
+ tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
278
+ (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
279
+ + stack_addend);
280
+ } else {
281
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
282
+ tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
283
+ /* jmp *tb. */
284
+ tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
285
+ }
286
287
/*
288
* Return path for goto_ptr. Set return value to 0, a-la exit_tb,
289
--
290
2.34.1
291
292
diff view generated by jsdifflib
New patch
1
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/loongarch64/tcg-target.c.inc | 126 +++++++++++++++----------------
5
1 file changed, 61 insertions(+), 65 deletions(-)
1
6
7
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/loongarch64/tcg-target.c.inc
10
+++ b/tcg/loongarch64/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
12
return TCG_REG_A0 + slot;
13
}
14
15
-#ifndef CONFIG_SOFTMMU
16
-#define USE_GUEST_BASE (guest_base != 0)
17
#define TCG_GUEST_BASE_REG TCG_REG_S1
18
-#endif
19
20
#define TCG_CT_CONST_ZERO 0x100
21
#define TCG_CT_CONST_S12 0x200
22
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
23
h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
24
a_bits = h->aa.align;
25
26
-#ifdef CONFIG_SOFTMMU
27
- unsigned s_bits = opc & MO_SIZE;
28
- int mem_index = get_mmuidx(oi);
29
- int fast_ofs = tlb_mask_table_ofs(s, mem_index);
30
- int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
31
- int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
32
+ if (tcg_use_softmmu) {
33
+ unsigned s_bits = opc & MO_SIZE;
34
+ int mem_index = get_mmuidx(oi);
35
+ int fast_ofs = tlb_mask_table_ofs(s, mem_index);
36
+ int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
37
+ int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
38
39
- ldst = new_ldst_label(s);
40
- ldst->is_ld = is_ld;
41
- ldst->oi = oi;
42
- ldst->addrlo_reg = addr_reg;
43
-
44
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
45
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
46
-
47
- tcg_out_opc_srli_d(s, TCG_REG_TMP2, addr_reg,
48
- s->page_bits - CPU_TLB_ENTRY_BITS);
49
- tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
50
- tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
51
-
52
- /* Load the tlb comparator and the addend. */
53
- QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
54
- tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2,
55
- is_ld ? offsetof(CPUTLBEntry, addr_read)
56
- : offsetof(CPUTLBEntry, addr_write));
57
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
58
- offsetof(CPUTLBEntry, addend));
59
-
60
- /*
61
- * For aligned accesses, we check the first byte and include the alignment
62
- * bits within the address. For unaligned access, we check that we don't
63
- * cross pages using the address of the last byte of the access.
64
- */
65
- if (a_bits < s_bits) {
66
- unsigned a_mask = (1u << a_bits) - 1;
67
- unsigned s_mask = (1u << s_bits) - 1;
68
- tcg_out_addi(s, addr_type, TCG_REG_TMP1, addr_reg, s_mask - a_mask);
69
- } else {
70
- tcg_out_mov(s, addr_type, TCG_REG_TMP1, addr_reg);
71
- }
72
- tcg_out_opc_bstrins_d(s, TCG_REG_TMP1, TCG_REG_ZERO,
73
- a_bits, s->page_bits - 1);
74
-
75
- /* Compare masked address with the TLB entry. */
76
- ldst->label_ptr[0] = s->code_ptr;
77
- tcg_out_opc_bne(s, TCG_REG_TMP0, TCG_REG_TMP1, 0);
78
-
79
- h->index = TCG_REG_TMP2;
80
-#else
81
- if (a_bits) {
82
ldst = new_ldst_label(s);
83
-
84
ldst->is_ld = is_ld;
85
ldst->oi = oi;
86
ldst->addrlo_reg = addr_reg;
87
88
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
89
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
90
+
91
+ tcg_out_opc_srli_d(s, TCG_REG_TMP2, addr_reg,
92
+ s->page_bits - CPU_TLB_ENTRY_BITS);
93
+ tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
94
+ tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
95
+
96
+ /* Load the tlb comparator and the addend. */
97
+ QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
98
+ tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2,
99
+ is_ld ? offsetof(CPUTLBEntry, addr_read)
100
+ : offsetof(CPUTLBEntry, addr_write));
101
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
102
+ offsetof(CPUTLBEntry, addend));
103
+
104
/*
105
- * Without micro-architecture details, we don't know which of
106
- * bstrpick or andi is faster, so use bstrpick as it's not
107
- * constrained by imm field width. Not to say alignments >= 2^12
108
- * are going to happen any time soon.
109
+ * For aligned accesses, we check the first byte and include the
110
+ * alignment bits within the address. For unaligned access, we
111
+ * check that we don't cross pages using the address of the last
112
+ * byte of the access.
113
*/
114
- tcg_out_opc_bstrpick_d(s, TCG_REG_TMP1, addr_reg, 0, a_bits - 1);
115
+ if (a_bits < s_bits) {
116
+ unsigned a_mask = (1u << a_bits) - 1;
117
+ unsigned s_mask = (1u << s_bits) - 1;
118
+ tcg_out_addi(s, addr_type, TCG_REG_TMP1, addr_reg, s_mask - a_mask);
119
+ } else {
120
+ tcg_out_mov(s, addr_type, TCG_REG_TMP1, addr_reg);
121
+ }
122
+ tcg_out_opc_bstrins_d(s, TCG_REG_TMP1, TCG_REG_ZERO,
123
+ a_bits, s->page_bits - 1);
124
125
+ /* Compare masked address with the TLB entry. */
126
ldst->label_ptr[0] = s->code_ptr;
127
- tcg_out_opc_bne(s, TCG_REG_TMP1, TCG_REG_ZERO, 0);
128
- }
129
+ tcg_out_opc_bne(s, TCG_REG_TMP0, TCG_REG_TMP1, 0);
130
131
- h->index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
132
-#endif
133
+ h->index = TCG_REG_TMP2;
134
+ } else {
135
+ if (a_bits) {
136
+ ldst = new_ldst_label(s);
137
+
138
+ ldst->is_ld = is_ld;
139
+ ldst->oi = oi;
140
+ ldst->addrlo_reg = addr_reg;
141
+
142
+ /*
143
+ * Without micro-architecture details, we don't know which of
144
+ * bstrpick or andi is faster, so use bstrpick as it's not
145
+ * constrained by imm field width. Not to say alignments >= 2^12
146
+ * are going to happen any time soon.
147
+ */
148
+ tcg_out_opc_bstrpick_d(s, TCG_REG_TMP1, addr_reg, 0, a_bits - 1);
149
+
150
+ ldst->label_ptr[0] = s->code_ptr;
151
+ tcg_out_opc_bne(s, TCG_REG_TMP1, TCG_REG_ZERO, 0);
152
+ }
153
+
154
+ h->index = guest_base ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
155
+ }
156
157
if (addr_type == TCG_TYPE_I32) {
158
h->base = TCG_REG_TMP0;
159
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
160
TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
161
}
162
163
-#if !defined(CONFIG_SOFTMMU)
164
- if (USE_GUEST_BASE) {
165
+ if (!tcg_use_softmmu && guest_base) {
166
tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
167
tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
168
}
169
-#endif
170
171
/* Call generated code */
172
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
173
--
174
2.34.1
175
176
diff view generated by jsdifflib
New patch
1
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/mips/tcg-target.c.inc | 231 +++++++++++++++++++-------------------
5
1 file changed, 113 insertions(+), 118 deletions(-)
1
6
7
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/mips/tcg-target.c.inc
10
+++ b/tcg/mips/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
12
#define TCG_TMP2 TCG_REG_T8
13
#define TCG_TMP3 TCG_REG_T7
14
15
-#ifndef CONFIG_SOFTMMU
16
#define TCG_GUEST_BASE_REG TCG_REG_S7
17
-#endif
18
#if TCG_TARGET_REG_BITS == 64
19
#define TCG_REG_TB TCG_REG_S6
20
#else
21
-#define TCG_REG_TB (qemu_build_not_reached(), TCG_REG_ZERO)
22
+#define TCG_REG_TB ({ qemu_build_not_reached(); TCG_REG_ZERO; })
23
#endif
24
25
/* check if we really need so many registers :P */
26
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
27
a_bits = h->aa.align;
28
a_mask = (1 << a_bits) - 1;
29
30
-#ifdef CONFIG_SOFTMMU
31
- unsigned s_mask = (1 << s_bits) - 1;
32
- int mem_index = get_mmuidx(oi);
33
- int fast_off = tlb_mask_table_ofs(s, mem_index);
34
- int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
35
- int table_off = fast_off + offsetof(CPUTLBDescFast, table);
36
- int add_off = offsetof(CPUTLBEntry, addend);
37
- int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
38
- : offsetof(CPUTLBEntry, addr_write);
39
+ if (tcg_use_softmmu) {
40
+ unsigned s_mask = (1 << s_bits) - 1;
41
+ int mem_index = get_mmuidx(oi);
42
+ int fast_off = tlb_mask_table_ofs(s, mem_index);
43
+ int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
44
+ int table_off = fast_off + offsetof(CPUTLBDescFast, table);
45
+ int add_off = offsetof(CPUTLBEntry, addend);
46
+ int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
47
+ : offsetof(CPUTLBEntry, addr_write);
48
49
- ldst = new_ldst_label(s);
50
- ldst->is_ld = is_ld;
51
- ldst->oi = oi;
52
- ldst->addrlo_reg = addrlo;
53
- ldst->addrhi_reg = addrhi;
54
-
55
- /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
56
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_AREG0, mask_off);
57
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP1, TCG_AREG0, table_off);
58
-
59
- /* Extract the TLB index from the address into TMP3. */
60
- if (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32) {
61
- tcg_out_opc_sa(s, OPC_SRL, TCG_TMP3, addrlo,
62
- s->page_bits - CPU_TLB_ENTRY_BITS);
63
- } else {
64
- tcg_out_dsrl(s, TCG_TMP3, addrlo,
65
- s->page_bits - CPU_TLB_ENTRY_BITS);
66
- }
67
- tcg_out_opc_reg(s, OPC_AND, TCG_TMP3, TCG_TMP3, TCG_TMP0);
68
-
69
- /* Add the tlb_table pointer, creating the CPUTLBEntry address in TMP3. */
70
- tcg_out_opc_reg(s, ALIAS_PADD, TCG_TMP3, TCG_TMP3, TCG_TMP1);
71
-
72
- if (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32) {
73
- /* Load the (low half) tlb comparator. */
74
- tcg_out_ld(s, TCG_TYPE_I32, TCG_TMP0, TCG_TMP3,
75
- cmp_off + HOST_BIG_ENDIAN * 4);
76
- } else {
77
- tcg_out_ld(s, TCG_TYPE_I64, TCG_TMP0, TCG_TMP3, cmp_off);
78
- }
79
-
80
- if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) {
81
- /* Load the tlb addend for the fast path. */
82
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP3, TCG_TMP3, add_off);
83
- }
84
-
85
- /*
86
- * Mask the page bits, keeping the alignment bits to compare against.
87
- * For unaligned accesses, compare against the end of the access to
88
- * verify that it does not cross a page boundary.
89
- */
90
- tcg_out_movi(s, addr_type, TCG_TMP1, s->page_mask | a_mask);
91
- if (a_mask < s_mask) {
92
- if (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32) {
93
- tcg_out_opc_imm(s, OPC_ADDIU, TCG_TMP2, addrlo, s_mask - a_mask);
94
- } else {
95
- tcg_out_opc_imm(s, OPC_DADDIU, TCG_TMP2, addrlo, s_mask - a_mask);
96
- }
97
- tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP2);
98
- } else {
99
- tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrlo);
100
- }
101
-
102
- /* Zero extend a 32-bit guest address for a 64-bit host. */
103
- if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
104
- tcg_out_ext32u(s, TCG_TMP2, addrlo);
105
- addrlo = TCG_TMP2;
106
- }
107
-
108
- ldst->label_ptr[0] = s->code_ptr;
109
- tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0);
110
-
111
- /* Load and test the high half tlb comparator. */
112
- if (TCG_TARGET_REG_BITS == 32 && addr_type != TCG_TYPE_I32) {
113
- /* delay slot */
114
- tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF);
115
-
116
- /* Load the tlb addend for the fast path. */
117
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP3, TCG_TMP3, add_off);
118
-
119
- ldst->label_ptr[1] = s->code_ptr;
120
- tcg_out_opc_br(s, OPC_BNE, addrhi, TCG_TMP0);
121
- }
122
-
123
- /* delay slot */
124
- base = TCG_TMP3;
125
- tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP3, addrlo);
126
-#else
127
- if (a_mask && (use_mips32r6_instructions || a_bits != s_bits)) {
128
ldst = new_ldst_label(s);
129
-
130
ldst->is_ld = is_ld;
131
ldst->oi = oi;
132
ldst->addrlo_reg = addrlo;
133
ldst->addrhi_reg = addrhi;
134
135
- /* We are expecting a_bits to max out at 7, much lower than ANDI. */
136
- tcg_debug_assert(a_bits < 16);
137
- tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, addrlo, a_mask);
138
+ /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
139
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_AREG0, mask_off);
140
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP1, TCG_AREG0, table_off);
141
+
142
+ /* Extract the TLB index from the address into TMP3. */
143
+ if (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32) {
144
+ tcg_out_opc_sa(s, OPC_SRL, TCG_TMP3, addrlo,
145
+ s->page_bits - CPU_TLB_ENTRY_BITS);
146
+ } else {
147
+ tcg_out_dsrl(s, TCG_TMP3, addrlo,
148
+ s->page_bits - CPU_TLB_ENTRY_BITS);
149
+ }
150
+ tcg_out_opc_reg(s, OPC_AND, TCG_TMP3, TCG_TMP3, TCG_TMP0);
151
+
152
+ /* Add the tlb_table pointer, creating the CPUTLBEntry address. */
153
+ tcg_out_opc_reg(s, ALIAS_PADD, TCG_TMP3, TCG_TMP3, TCG_TMP1);
154
+
155
+ if (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32) {
156
+ /* Load the (low half) tlb comparator. */
157
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_TMP0, TCG_TMP3,
158
+ cmp_off + HOST_BIG_ENDIAN * 4);
159
+ } else {
160
+ tcg_out_ld(s, TCG_TYPE_I64, TCG_TMP0, TCG_TMP3, cmp_off);
161
+ }
162
+
163
+ if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) {
164
+ /* Load the tlb addend for the fast path. */
165
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP3, TCG_TMP3, add_off);
166
+ }
167
+
168
+ /*
169
+ * Mask the page bits, keeping the alignment bits to compare against.
170
+ * For unaligned accesses, compare against the end of the access to
171
+ * verify that it does not cross a page boundary.
172
+ */
173
+ tcg_out_movi(s, addr_type, TCG_TMP1, s->page_mask | a_mask);
174
+ if (a_mask < s_mask) {
175
+ tcg_out_opc_imm(s, (TCG_TARGET_REG_BITS == 32
176
+ || addr_type == TCG_TYPE_I32
177
+ ? OPC_ADDIU : OPC_DADDIU),
178
+ TCG_TMP2, addrlo, s_mask - a_mask);
179
+ tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP2);
180
+ } else {
181
+ tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrlo);
182
+ }
183
+
184
+ /* Zero extend a 32-bit guest address for a 64-bit host. */
185
+ if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
186
+ tcg_out_ext32u(s, TCG_TMP2, addrlo);
187
+ addrlo = TCG_TMP2;
188
+ }
189
190
ldst->label_ptr[0] = s->code_ptr;
191
- if (use_mips32r6_instructions) {
192
- tcg_out_opc_br(s, OPC_BNEZALC_R6, TCG_REG_ZERO, TCG_TMP0);
193
- } else {
194
- tcg_out_opc_br(s, OPC_BNEL, TCG_TMP0, TCG_REG_ZERO);
195
- tcg_out_nop(s);
196
- }
197
- }
198
+ tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0);
199
200
- base = addrlo;
201
- if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
202
- tcg_out_ext32u(s, TCG_REG_A0, base);
203
- base = TCG_REG_A0;
204
- }
205
- if (guest_base) {
206
- if (guest_base == (int16_t)guest_base) {
207
- tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_A0, base, guest_base);
208
- } else {
209
- tcg_out_opc_reg(s, ALIAS_PADD, TCG_REG_A0, base,
210
- TCG_GUEST_BASE_REG);
211
+ /* Load and test the high half tlb comparator. */
212
+ if (TCG_TARGET_REG_BITS == 32 && addr_type != TCG_TYPE_I32) {
213
+ /* delay slot */
214
+ tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF);
215
+
216
+ /* Load the tlb addend for the fast path. */
217
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP3, TCG_TMP3, add_off);
218
+
219
+ ldst->label_ptr[1] = s->code_ptr;
220
+ tcg_out_opc_br(s, OPC_BNE, addrhi, TCG_TMP0);
221
+ }
222
+
223
+ /* delay slot */
224
+ base = TCG_TMP3;
225
+ tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP3, addrlo);
226
+ } else {
227
+ if (a_mask && (use_mips32r6_instructions || a_bits != s_bits)) {
228
+ ldst = new_ldst_label(s);
229
+
230
+ ldst->is_ld = is_ld;
231
+ ldst->oi = oi;
232
+ ldst->addrlo_reg = addrlo;
233
+ ldst->addrhi_reg = addrhi;
234
+
235
+ /* We are expecting a_bits to max out at 7, much lower than ANDI. */
236
+ tcg_debug_assert(a_bits < 16);
237
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, addrlo, a_mask);
238
+
239
+ ldst->label_ptr[0] = s->code_ptr;
240
+ if (use_mips32r6_instructions) {
241
+ tcg_out_opc_br(s, OPC_BNEZALC_R6, TCG_REG_ZERO, TCG_TMP0);
242
+ } else {
243
+ tcg_out_opc_br(s, OPC_BNEL, TCG_TMP0, TCG_REG_ZERO);
244
+ tcg_out_nop(s);
245
+ }
246
+ }
247
+
248
+ base = addrlo;
249
+ if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
250
+ tcg_out_ext32u(s, TCG_REG_A0, base);
251
+ base = TCG_REG_A0;
252
+ }
253
+ if (guest_base) {
254
+ if (guest_base == (int16_t)guest_base) {
255
+ tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_A0, base, guest_base);
256
+ } else {
257
+ tcg_out_opc_reg(s, ALIAS_PADD, TCG_REG_A0, base,
258
+ TCG_GUEST_BASE_REG);
259
+ }
260
+ base = TCG_REG_A0;
261
}
262
- base = TCG_REG_A0;
263
}
264
-#endif
265
266
h->base = base;
267
return ldst;
268
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
269
TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
270
}
271
272
-#ifndef CONFIG_SOFTMMU
273
- if (guest_base != (int16_t)guest_base) {
274
+ if (!tcg_use_softmmu && guest_base != (int16_t)guest_base) {
275
/*
276
* The function call abi for n32 and n64 will have loaded $25 (t9)
277
* with the address of the prologue, so we can use that instead
278
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
279
TCG_TARGET_REG_BITS == 64 ? TCG_REG_T9 : 0);
280
tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
281
}
282
-#endif
283
284
if (TCG_TARGET_REG_BITS == 64) {
285
tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, tcg_target_call_iarg_regs[1]);
286
--
287
2.34.1
288
289
diff view generated by jsdifflib
New patch
1
Fix TCG_GUEST_BASE_REG to use 'TCG_REG_R30' instead of '30'.
1
2
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/ppc/tcg-target.c.inc | 284 ++++++++++++++++++++-------------------
7
1 file changed, 143 insertions(+), 141 deletions(-)
8
9
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/ppc/tcg-target.c.inc
12
+++ b/tcg/ppc/tcg-target.c.inc
13
@@ -XXX,XX +XXX,XX @@
14
15
#define have_isel (cpuinfo & CPUINFO_ISEL)
16
17
-#ifndef CONFIG_SOFTMMU
18
-#define TCG_GUEST_BASE_REG 30
19
-#endif
20
+#define TCG_GUEST_BASE_REG TCG_REG_R30
21
22
#ifdef CONFIG_DEBUG_TCG
23
static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = {
24
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
25
s_bits == MO_128);
26
a_bits = h->aa.align;
27
28
-#ifdef CONFIG_SOFTMMU
29
- int mem_index = get_mmuidx(oi);
30
- int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
31
- : offsetof(CPUTLBEntry, addr_write);
32
- int fast_off = tlb_mask_table_ofs(s, mem_index);
33
- int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
34
- int table_off = fast_off + offsetof(CPUTLBDescFast, table);
35
+ if (tcg_use_softmmu) {
36
+ int mem_index = get_mmuidx(oi);
37
+ int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
38
+ : offsetof(CPUTLBEntry, addr_write);
39
+ int fast_off = tlb_mask_table_ofs(s, mem_index);
40
+ int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
41
+ int table_off = fast_off + offsetof(CPUTLBDescFast, table);
42
43
- ldst = new_ldst_label(s);
44
- ldst->is_ld = is_ld;
45
- ldst->oi = oi;
46
- ldst->addrlo_reg = addrlo;
47
- ldst->addrhi_reg = addrhi;
48
-
49
- /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
50
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off);
51
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_AREG0, table_off);
52
-
53
- /* Extract the page index, shifted into place for tlb index. */
54
- if (TCG_TARGET_REG_BITS == 32) {
55
- tcg_out_shri32(s, TCG_REG_R0, addrlo,
56
- s->page_bits - CPU_TLB_ENTRY_BITS);
57
- } else {
58
- tcg_out_shri64(s, TCG_REG_R0, addrlo,
59
- s->page_bits - CPU_TLB_ENTRY_BITS);
60
- }
61
- tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0));
62
-
63
- /*
64
- * Load the (low part) TLB comparator into TMP2.
65
- * For 64-bit host, always load the entire 64-bit slot for simplicity.
66
- * We will ignore the high bits with tcg_out_cmp(..., addr_type).
67
- */
68
- if (TCG_TARGET_REG_BITS == 64) {
69
- if (cmp_off == 0) {
70
- tcg_out32(s, LDUX | TAB(TCG_REG_TMP2, TCG_REG_TMP1, TCG_REG_TMP2));
71
- } else {
72
- tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2));
73
- tcg_out_ld(s, TCG_TYPE_I64, TCG_REG_TMP2, TCG_REG_TMP1, cmp_off);
74
- }
75
- } else if (cmp_off == 0 && !HOST_BIG_ENDIAN) {
76
- tcg_out32(s, LWZUX | TAB(TCG_REG_TMP2, TCG_REG_TMP1, TCG_REG_TMP2));
77
- } else {
78
- tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2));
79
- tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1,
80
- cmp_off + 4 * HOST_BIG_ENDIAN);
81
- }
82
-
83
- /*
84
- * Load the TLB addend for use on the fast path.
85
- * Do this asap to minimize any load use delay.
86
- */
87
- if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) {
88
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
89
- offsetof(CPUTLBEntry, addend));
90
- }
91
-
92
- /* Clear the non-page, non-alignment bits from the address in R0. */
93
- if (TCG_TARGET_REG_BITS == 32) {
94
- /*
95
- * We don't support unaligned accesses on 32-bits.
96
- * Preserve the bottom bits and thus trigger a comparison
97
- * failure on unaligned accesses.
98
- */
99
- if (a_bits < s_bits) {
100
- a_bits = s_bits;
101
- }
102
- tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
103
- (32 - a_bits) & 31, 31 - s->page_bits);
104
- } else {
105
- TCGReg t = addrlo;
106
-
107
- /*
108
- * If the access is unaligned, we need to make sure we fail if we
109
- * cross a page boundary. The trick is to add the access size-1
110
- * to the address before masking the low bits. That will make the
111
- * address overflow to the next page if we cross a page boundary,
112
- * which will then force a mismatch of the TLB compare.
113
- */
114
- if (a_bits < s_bits) {
115
- unsigned a_mask = (1 << a_bits) - 1;
116
- unsigned s_mask = (1 << s_bits) - 1;
117
- tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
118
- t = TCG_REG_R0;
119
- }
120
-
121
- /* Mask the address for the requested alignment. */
122
- if (addr_type == TCG_TYPE_I32) {
123
- tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
124
- (32 - a_bits) & 31, 31 - s->page_bits);
125
- } else if (a_bits == 0) {
126
- tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - s->page_bits);
127
- } else {
128
- tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
129
- 64 - s->page_bits, s->page_bits - a_bits);
130
- tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, s->page_bits, 0);
131
- }
132
- }
133
-
134
- if (TCG_TARGET_REG_BITS == 32 && addr_type != TCG_TYPE_I32) {
135
- /* Low part comparison into cr7. */
136
- tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2,
137
- 0, 7, TCG_TYPE_I32);
138
-
139
- /* Load the high part TLB comparator into TMP2. */
140
- tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1,
141
- cmp_off + 4 * !HOST_BIG_ENDIAN);
142
-
143
- /* Load addend, deferred for this case. */
144
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
145
- offsetof(CPUTLBEntry, addend));
146
-
147
- /* High part comparison into cr6. */
148
- tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_TMP2, 0, 6, TCG_TYPE_I32);
149
-
150
- /* Combine comparisons into cr7. */
151
- tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
152
- } else {
153
- /* Full comparison into cr7. */
154
- tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2, 0, 7, addr_type);
155
- }
156
-
157
- /* Load a pointer into the current opcode w/conditional branch-link. */
158
- ldst->label_ptr[0] = s->code_ptr;
159
- tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
160
-
161
- h->base = TCG_REG_TMP1;
162
-#else
163
- if (a_bits) {
164
ldst = new_ldst_label(s);
165
ldst->is_ld = is_ld;
166
ldst->oi = oi;
167
ldst->addrlo_reg = addrlo;
168
ldst->addrhi_reg = addrhi;
169
170
- /* We are expecting a_bits to max out at 7, much lower than ANDI. */
171
- tcg_debug_assert(a_bits < 16);
172
- tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, (1 << a_bits) - 1));
173
+ /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
174
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off);
175
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_AREG0, table_off);
176
177
+ /* Extract the page index, shifted into place for tlb index. */
178
+ if (TCG_TARGET_REG_BITS == 32) {
179
+ tcg_out_shri32(s, TCG_REG_R0, addrlo,
180
+ s->page_bits - CPU_TLB_ENTRY_BITS);
181
+ } else {
182
+ tcg_out_shri64(s, TCG_REG_R0, addrlo,
183
+ s->page_bits - CPU_TLB_ENTRY_BITS);
184
+ }
185
+ tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0));
186
+
187
+ /*
188
+ * Load the (low part) TLB comparator into TMP2.
189
+ * For 64-bit host, always load the entire 64-bit slot for simplicity.
190
+ * We will ignore the high bits with tcg_out_cmp(..., addr_type).
191
+ */
192
+ if (TCG_TARGET_REG_BITS == 64) {
193
+ if (cmp_off == 0) {
194
+ tcg_out32(s, LDUX | TAB(TCG_REG_TMP2,
195
+ TCG_REG_TMP1, TCG_REG_TMP2));
196
+ } else {
197
+ tcg_out32(s, ADD | TAB(TCG_REG_TMP1,
198
+ TCG_REG_TMP1, TCG_REG_TMP2));
199
+ tcg_out_ld(s, TCG_TYPE_I64, TCG_REG_TMP2,
200
+ TCG_REG_TMP1, cmp_off);
201
+ }
202
+ } else if (cmp_off == 0 && !HOST_BIG_ENDIAN) {
203
+ tcg_out32(s, LWZUX | TAB(TCG_REG_TMP2,
204
+ TCG_REG_TMP1, TCG_REG_TMP2));
205
+ } else {
206
+ tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2));
207
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1,
208
+ cmp_off + 4 * HOST_BIG_ENDIAN);
209
+ }
210
+
211
+ /*
212
+ * Load the TLB addend for use on the fast path.
213
+ * Do this asap to minimize any load use delay.
214
+ */
215
+ if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) {
216
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
217
+ offsetof(CPUTLBEntry, addend));
218
+ }
219
+
220
+ /* Clear the non-page, non-alignment bits from the address in R0. */
221
+ if (TCG_TARGET_REG_BITS == 32) {
222
+ /*
223
+ * We don't support unaligned accesses on 32-bits.
224
+ * Preserve the bottom bits and thus trigger a comparison
225
+ * failure on unaligned accesses.
226
+ */
227
+ if (a_bits < s_bits) {
228
+ a_bits = s_bits;
229
+ }
230
+ tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
231
+ (32 - a_bits) & 31, 31 - s->page_bits);
232
+ } else {
233
+ TCGReg t = addrlo;
234
+
235
+ /*
236
+ * If the access is unaligned, we need to make sure we fail if we
237
+ * cross a page boundary. The trick is to add the access size-1
238
+ * to the address before masking the low bits. That will make the
239
+ * address overflow to the next page if we cross a page boundary,
240
+ * which will then force a mismatch of the TLB compare.
241
+ */
242
+ if (a_bits < s_bits) {
243
+ unsigned a_mask = (1 << a_bits) - 1;
244
+ unsigned s_mask = (1 << s_bits) - 1;
245
+ tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
246
+ t = TCG_REG_R0;
247
+ }
248
+
249
+ /* Mask the address for the requested alignment. */
250
+ if (addr_type == TCG_TYPE_I32) {
251
+ tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
252
+ (32 - a_bits) & 31, 31 - s->page_bits);
253
+ } else if (a_bits == 0) {
254
+ tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - s->page_bits);
255
+ } else {
256
+ tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
257
+ 64 - s->page_bits, s->page_bits - a_bits);
258
+ tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, s->page_bits, 0);
259
+ }
260
+ }
261
+
262
+ if (TCG_TARGET_REG_BITS == 32 && addr_type != TCG_TYPE_I32) {
263
+ /* Low part comparison into cr7. */
264
+ tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2,
265
+ 0, 7, TCG_TYPE_I32);
266
+
267
+ /* Load the high part TLB comparator into TMP2. */
268
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1,
269
+ cmp_off + 4 * !HOST_BIG_ENDIAN);
270
+
271
+ /* Load addend, deferred for this case. */
272
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
273
+ offsetof(CPUTLBEntry, addend));
274
+
275
+ /* High part comparison into cr6. */
276
+ tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_TMP2,
277
+ 0, 6, TCG_TYPE_I32);
278
+
279
+ /* Combine comparisons into cr7. */
280
+ tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
281
+ } else {
282
+ /* Full comparison into cr7. */
283
+ tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2,
284
+ 0, 7, addr_type);
285
+ }
286
+
287
+ /* Load a pointer into the current opcode w/conditional branch-link. */
288
ldst->label_ptr[0] = s->code_ptr;
289
- tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK);
290
- }
291
+ tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
292
293
- h->base = guest_base ? TCG_GUEST_BASE_REG : 0;
294
-#endif
295
+ h->base = TCG_REG_TMP1;
296
+ } else {
297
+ if (a_bits) {
298
+ ldst = new_ldst_label(s);
299
+ ldst->is_ld = is_ld;
300
+ ldst->oi = oi;
301
+ ldst->addrlo_reg = addrlo;
302
+ ldst->addrhi_reg = addrhi;
303
+
304
+ /* We are expecting a_bits to max out at 7, much lower than ANDI. */
305
+ tcg_debug_assert(a_bits < 16);
306
+ tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, (1 << a_bits) - 1));
307
+
308
+ ldst->label_ptr[0] = s->code_ptr;
309
+ tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK);
310
+ }
311
+
312
+ h->base = guest_base ? TCG_GUEST_BASE_REG : 0;
313
+ }
314
315
if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
316
/* Zero-extend the guest address for use in the host address. */
317
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
318
}
319
tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
320
321
-#ifndef CONFIG_SOFTMMU
322
- if (guest_base) {
323
+ if (!tcg_use_softmmu && guest_base) {
324
tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
325
tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
326
}
327
-#endif
328
329
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
330
tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
331
--
332
2.34.1
333
334
diff view generated by jsdifflib
New patch
1
Fixes: 92c041c59b ("tcg/riscv: Add the prologue generation and register the JIT")
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/riscv/tcg-target.c.inc | 6 ++++--
5
1 file changed, 4 insertions(+), 2 deletions(-)
1
6
7
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/riscv/tcg-target.c.inc
10
+++ b/tcg/riscv/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
12
}
13
14
#if !defined(CONFIG_SOFTMMU)
15
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
16
- tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
17
+ if (guest_base) {
18
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
19
+ tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
20
+ }
21
#endif
22
23
/* Call generated code */
24
--
25
2.34.1
diff view generated by jsdifflib
New patch
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
---
3
tcg/riscv/tcg-target.c.inc | 185 +++++++++++++++++++------------------
4
1 file changed, 94 insertions(+), 91 deletions(-)
1
5
6
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
7
index XXXXXXX..XXXXXXX 100644
8
--- a/tcg/riscv/tcg-target.c.inc
9
+++ b/tcg/riscv/tcg-target.c.inc
10
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
11
aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
12
a_mask = (1u << aa.align) - 1;
13
14
-#ifdef CONFIG_SOFTMMU
15
- unsigned s_bits = opc & MO_SIZE;
16
- unsigned s_mask = (1u << s_bits) - 1;
17
- int mem_index = get_mmuidx(oi);
18
- int fast_ofs = tlb_mask_table_ofs(s, mem_index);
19
- int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
20
- int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
21
- int compare_mask;
22
- TCGReg addr_adj;
23
+ if (tcg_use_softmmu) {
24
+ unsigned s_bits = opc & MO_SIZE;
25
+ unsigned s_mask = (1u << s_bits) - 1;
26
+ int mem_index = get_mmuidx(oi);
27
+ int fast_ofs = tlb_mask_table_ofs(s, mem_index);
28
+ int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
29
+ int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
30
+ int compare_mask;
31
+ TCGReg addr_adj;
32
33
- ldst = new_ldst_label(s);
34
- ldst->is_ld = is_ld;
35
- ldst->oi = oi;
36
- ldst->addrlo_reg = addr_reg;
37
-
38
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
39
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
40
-
41
- tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr_reg,
42
- s->page_bits - CPU_TLB_ENTRY_BITS);
43
- tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
44
- tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
45
-
46
- /*
47
- * For aligned accesses, we check the first byte and include the alignment
48
- * bits within the address. For unaligned access, we check that we don't
49
- * cross pages using the address of the last byte of the access.
50
- */
51
- addr_adj = addr_reg;
52
- if (a_mask < s_mask) {
53
- addr_adj = TCG_REG_TMP0;
54
- tcg_out_opc_imm(s, addr_type == TCG_TYPE_I32 ? OPC_ADDIW : OPC_ADDI,
55
- addr_adj, addr_reg, s_mask - a_mask);
56
- }
57
- compare_mask = s->page_mask | a_mask;
58
- if (compare_mask == sextreg(compare_mask, 0, 12)) {
59
- tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_adj, compare_mask);
60
- } else {
61
- tcg_out_movi(s, addr_type, TCG_REG_TMP1, compare_mask);
62
- tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addr_adj);
63
- }
64
-
65
- /* Load the tlb comparator and the addend. */
66
- QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
67
- tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2,
68
- is_ld ? offsetof(CPUTLBEntry, addr_read)
69
- : offsetof(CPUTLBEntry, addr_write));
70
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
71
- offsetof(CPUTLBEntry, addend));
72
-
73
- /* Compare masked address with the TLB entry. */
74
- ldst->label_ptr[0] = s->code_ptr;
75
- tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0);
76
-
77
- /* TLB Hit - translate address using addend. */
78
- if (addr_type != TCG_TYPE_I32) {
79
- tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, addr_reg, TCG_REG_TMP2);
80
- } else if (have_zba) {
81
- tcg_out_opc_reg(s, OPC_ADD_UW, TCG_REG_TMP0, addr_reg, TCG_REG_TMP2);
82
- } else {
83
- tcg_out_ext32u(s, TCG_REG_TMP0, addr_reg);
84
- tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP0, TCG_REG_TMP2);
85
- }
86
- *pbase = TCG_REG_TMP0;
87
-#else
88
- TCGReg base;
89
-
90
- if (a_mask) {
91
ldst = new_ldst_label(s);
92
ldst->is_ld = is_ld;
93
ldst->oi = oi;
94
ldst->addrlo_reg = addr_reg;
95
96
- /* We are expecting alignment max 7, so we can always use andi. */
97
- tcg_debug_assert(a_mask == sextreg(a_mask, 0, 12));
98
- tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, a_mask);
99
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
100
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
101
102
- ldst->label_ptr[0] = s->code_ptr;
103
- tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP1, TCG_REG_ZERO, 0);
104
- }
105
+ tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr_reg,
106
+ s->page_bits - CPU_TLB_ENTRY_BITS);
107
+ tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
108
+ tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
109
110
- if (guest_base != 0) {
111
- base = TCG_REG_TMP0;
112
- if (addr_type != TCG_TYPE_I32) {
113
- tcg_out_opc_reg(s, OPC_ADD, base, addr_reg, TCG_GUEST_BASE_REG);
114
- } else if (have_zba) {
115
- tcg_out_opc_reg(s, OPC_ADD_UW, base, addr_reg, TCG_GUEST_BASE_REG);
116
- } else {
117
- tcg_out_ext32u(s, base, addr_reg);
118
- tcg_out_opc_reg(s, OPC_ADD, base, base, TCG_GUEST_BASE_REG);
119
+ /*
120
+ * For aligned accesses, we check the first byte and include the
121
+ * alignment bits within the address. For unaligned access, we
122
+ * check that we don't cross pages using the address of the last
123
+ * byte of the access.
124
+ */
125
+ addr_adj = addr_reg;
126
+ if (a_mask < s_mask) {
127
+ addr_adj = TCG_REG_TMP0;
128
+ tcg_out_opc_imm(s, addr_type == TCG_TYPE_I32 ? OPC_ADDIW : OPC_ADDI,
129
+ addr_adj, addr_reg, s_mask - a_mask);
130
}
131
- } else if (addr_type != TCG_TYPE_I32) {
132
- base = addr_reg;
133
+ compare_mask = s->page_mask | a_mask;
134
+ if (compare_mask == sextreg(compare_mask, 0, 12)) {
135
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_adj, compare_mask);
136
+ } else {
137
+ tcg_out_movi(s, addr_type, TCG_REG_TMP1, compare_mask);
138
+ tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addr_adj);
139
+ }
140
+
141
+ /* Load the tlb comparator and the addend. */
142
+ QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
143
+ tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2,
144
+ is_ld ? offsetof(CPUTLBEntry, addr_read)
145
+ : offsetof(CPUTLBEntry, addr_write));
146
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
147
+ offsetof(CPUTLBEntry, addend));
148
+
149
+ /* Compare masked address with the TLB entry. */
150
+ ldst->label_ptr[0] = s->code_ptr;
151
+ tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0);
152
+
153
+ /* TLB Hit - translate address using addend. */
154
+ if (addr_type != TCG_TYPE_I32) {
155
+ tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, addr_reg, TCG_REG_TMP2);
156
+ } else if (have_zba) {
157
+ tcg_out_opc_reg(s, OPC_ADD_UW, TCG_REG_TMP0,
158
+ addr_reg, TCG_REG_TMP2);
159
+ } else {
160
+ tcg_out_ext32u(s, TCG_REG_TMP0, addr_reg);
161
+ tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0,
162
+ TCG_REG_TMP0, TCG_REG_TMP2);
163
+ }
164
+ *pbase = TCG_REG_TMP0;
165
} else {
166
- base = TCG_REG_TMP0;
167
- tcg_out_ext32u(s, base, addr_reg);
168
+ TCGReg base;
169
+
170
+ if (a_mask) {
171
+ ldst = new_ldst_label(s);
172
+ ldst->is_ld = is_ld;
173
+ ldst->oi = oi;
174
+ ldst->addrlo_reg = addr_reg;
175
+
176
+ /* We are expecting alignment max 7, so we can always use andi. */
177
+ tcg_debug_assert(a_mask == sextreg(a_mask, 0, 12));
178
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, a_mask);
179
+
180
+ ldst->label_ptr[0] = s->code_ptr;
181
+ tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP1, TCG_REG_ZERO, 0);
182
+ }
183
+
184
+ if (guest_base != 0) {
185
+ base = TCG_REG_TMP0;
186
+ if (addr_type != TCG_TYPE_I32) {
187
+ tcg_out_opc_reg(s, OPC_ADD, base, addr_reg,
188
+ TCG_GUEST_BASE_REG);
189
+ } else if (have_zba) {
190
+ tcg_out_opc_reg(s, OPC_ADD_UW, base, addr_reg,
191
+ TCG_GUEST_BASE_REG);
192
+ } else {
193
+ tcg_out_ext32u(s, base, addr_reg);
194
+ tcg_out_opc_reg(s, OPC_ADD, base, base, TCG_GUEST_BASE_REG);
195
+ }
196
+ } else if (addr_type != TCG_TYPE_I32) {
197
+ base = addr_reg;
198
+ } else {
199
+ base = TCG_REG_TMP0;
200
+ tcg_out_ext32u(s, base, addr_reg);
201
+ }
202
+ *pbase = base;
203
}
204
- *pbase = base;
205
-#endif
206
207
return ldst;
208
}
209
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
210
TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
211
}
212
213
-#if !defined(CONFIG_SOFTMMU)
214
- if (guest_base) {
215
+ if (!tcg_use_softmmu && guest_base) {
216
tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
217
tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
218
}
219
-#endif
220
221
/* Call generated code */
222
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
223
--
224
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/s390x/tcg-target.c.inc | 161 ++++++++++++++++++-------------------
5
1 file changed, 79 insertions(+), 82 deletions(-)
1
6
7
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/s390x/tcg-target.c.inc
10
+++ b/tcg/s390x/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@
12
/* A scratch register that may be be used throughout the backend. */
13
#define TCG_TMP0 TCG_REG_R1
14
15
-#ifndef CONFIG_SOFTMMU
16
#define TCG_GUEST_BASE_REG TCG_REG_R13
17
-#endif
18
19
/* All of the following instructions are prefixed with their instruction
20
format, and are defined as 8- or 16-bit quantities, even when the two
21
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
22
h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128);
23
a_mask = (1 << h->aa.align) - 1;
24
25
-#ifdef CONFIG_SOFTMMU
26
- unsigned s_mask = (1 << s_bits) - 1;
27
- int mem_index = get_mmuidx(oi);
28
- int fast_off = tlb_mask_table_ofs(s, mem_index);
29
- int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
30
- int table_off = fast_off + offsetof(CPUTLBDescFast, table);
31
- int ofs, a_off;
32
- uint64_t tlb_mask;
33
+ if (tcg_use_softmmu) {
34
+ unsigned s_mask = (1 << s_bits) - 1;
35
+ int mem_index = get_mmuidx(oi);
36
+ int fast_off = tlb_mask_table_ofs(s, mem_index);
37
+ int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
38
+ int table_off = fast_off + offsetof(CPUTLBDescFast, table);
39
+ int ofs, a_off;
40
+ uint64_t tlb_mask;
41
42
- ldst = new_ldst_label(s);
43
- ldst->is_ld = is_ld;
44
- ldst->oi = oi;
45
- ldst->addrlo_reg = addr_reg;
46
-
47
- tcg_out_sh64(s, RSY_SRLG, TCG_TMP0, addr_reg, TCG_REG_NONE,
48
- s->page_bits - CPU_TLB_ENTRY_BITS);
49
-
50
- tcg_out_insn(s, RXY, NG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, mask_off);
51
- tcg_out_insn(s, RXY, AG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, table_off);
52
-
53
- /*
54
- * For aligned accesses, we check the first byte and include the alignment
55
- * bits within the address. For unaligned access, we check that we don't
56
- * cross pages using the address of the last byte of the access.
57
- */
58
- a_off = (a_mask >= s_mask ? 0 : s_mask - a_mask);
59
- tlb_mask = (uint64_t)s->page_mask | a_mask;
60
- if (a_off == 0) {
61
- tgen_andi_risbg(s, TCG_REG_R0, addr_reg, tlb_mask);
62
- } else {
63
- tcg_out_insn(s, RX, LA, TCG_REG_R0, addr_reg, TCG_REG_NONE, a_off);
64
- tgen_andi(s, addr_type, TCG_REG_R0, tlb_mask);
65
- }
66
-
67
- if (is_ld) {
68
- ofs = offsetof(CPUTLBEntry, addr_read);
69
- } else {
70
- ofs = offsetof(CPUTLBEntry, addr_write);
71
- }
72
- if (addr_type == TCG_TYPE_I32) {
73
- ofs += HOST_BIG_ENDIAN * 4;
74
- tcg_out_insn(s, RX, C, TCG_REG_R0, TCG_TMP0, TCG_REG_NONE, ofs);
75
- } else {
76
- tcg_out_insn(s, RXY, CG, TCG_REG_R0, TCG_TMP0, TCG_REG_NONE, ofs);
77
- }
78
-
79
- tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
80
- ldst->label_ptr[0] = s->code_ptr++;
81
-
82
- h->index = TCG_TMP0;
83
- tcg_out_insn(s, RXY, LG, h->index, TCG_TMP0, TCG_REG_NONE,
84
- offsetof(CPUTLBEntry, addend));
85
-
86
- if (addr_type == TCG_TYPE_I32) {
87
- tcg_out_insn(s, RRE, ALGFR, h->index, addr_reg);
88
- h->base = TCG_REG_NONE;
89
- } else {
90
- h->base = addr_reg;
91
- }
92
- h->disp = 0;
93
-#else
94
- if (a_mask) {
95
ldst = new_ldst_label(s);
96
ldst->is_ld = is_ld;
97
ldst->oi = oi;
98
ldst->addrlo_reg = addr_reg;
99
100
- /* We are expecting a_bits to max out at 7, much lower than TMLL. */
101
- tcg_debug_assert(a_mask <= 0xffff);
102
- tcg_out_insn(s, RI, TMLL, addr_reg, a_mask);
103
+ tcg_out_sh64(s, RSY_SRLG, TCG_TMP0, addr_reg, TCG_REG_NONE,
104
+ s->page_bits - CPU_TLB_ENTRY_BITS);
105
106
- tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */
107
+ tcg_out_insn(s, RXY, NG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, mask_off);
108
+ tcg_out_insn(s, RXY, AG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, table_off);
109
+
110
+ /*
111
+ * For aligned accesses, we check the first byte and include the
112
+ * alignment bits within the address. For unaligned access, we
113
+ * check that we don't cross pages using the address of the last
114
+ * byte of the access.
115
+ */
116
+ a_off = (a_mask >= s_mask ? 0 : s_mask - a_mask);
117
+ tlb_mask = (uint64_t)s->page_mask | a_mask;
118
+ if (a_off == 0) {
119
+ tgen_andi_risbg(s, TCG_REG_R0, addr_reg, tlb_mask);
120
+ } else {
121
+ tcg_out_insn(s, RX, LA, TCG_REG_R0, addr_reg, TCG_REG_NONE, a_off);
122
+ tgen_andi(s, addr_type, TCG_REG_R0, tlb_mask);
123
+ }
124
+
125
+ if (is_ld) {
126
+ ofs = offsetof(CPUTLBEntry, addr_read);
127
+ } else {
128
+ ofs = offsetof(CPUTLBEntry, addr_write);
129
+ }
130
+ if (addr_type == TCG_TYPE_I32) {
131
+ ofs += HOST_BIG_ENDIAN * 4;
132
+ tcg_out_insn(s, RX, C, TCG_REG_R0, TCG_TMP0, TCG_REG_NONE, ofs);
133
+ } else {
134
+ tcg_out_insn(s, RXY, CG, TCG_REG_R0, TCG_TMP0, TCG_REG_NONE, ofs);
135
+ }
136
+
137
+ tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
138
ldst->label_ptr[0] = s->code_ptr++;
139
- }
140
141
- h->base = addr_reg;
142
- if (addr_type == TCG_TYPE_I32) {
143
- tcg_out_ext32u(s, TCG_TMP0, addr_reg);
144
- h->base = TCG_TMP0;
145
- }
146
- if (guest_base < 0x80000) {
147
- h->index = TCG_REG_NONE;
148
- h->disp = guest_base;
149
- } else {
150
- h->index = TCG_GUEST_BASE_REG;
151
+ h->index = TCG_TMP0;
152
+ tcg_out_insn(s, RXY, LG, h->index, TCG_TMP0, TCG_REG_NONE,
153
+ offsetof(CPUTLBEntry, addend));
154
+
155
+ if (addr_type == TCG_TYPE_I32) {
156
+ tcg_out_insn(s, RRE, ALGFR, h->index, addr_reg);
157
+ h->base = TCG_REG_NONE;
158
+ } else {
159
+ h->base = addr_reg;
160
+ }
161
h->disp = 0;
162
+ } else {
163
+ if (a_mask) {
164
+ ldst = new_ldst_label(s);
165
+ ldst->is_ld = is_ld;
166
+ ldst->oi = oi;
167
+ ldst->addrlo_reg = addr_reg;
168
+
169
+ /* We are expecting a_bits to max out at 7, much lower than TMLL. */
170
+ tcg_debug_assert(a_mask <= 0xffff);
171
+ tcg_out_insn(s, RI, TMLL, addr_reg, a_mask);
172
+
173
+ tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */
174
+ ldst->label_ptr[0] = s->code_ptr++;
175
+ }
176
+
177
+ h->base = addr_reg;
178
+ if (addr_type == TCG_TYPE_I32) {
179
+ tcg_out_ext32u(s, TCG_TMP0, addr_reg);
180
+ h->base = TCG_TMP0;
181
+ }
182
+ if (guest_base < 0x80000) {
183
+ h->index = TCG_REG_NONE;
184
+ h->disp = guest_base;
185
+ } else {
186
+ h->index = TCG_GUEST_BASE_REG;
187
+ h->disp = 0;
188
+ }
189
}
190
-#endif
191
192
return ldst;
193
}
194
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
195
TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET,
196
CPU_TEMP_BUF_NLONGS * sizeof(long));
197
198
-#ifndef CONFIG_SOFTMMU
199
- if (guest_base >= 0x80000) {
200
+ if (!tcg_use_softmmu && guest_base >= 0x80000) {
201
tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
202
tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
203
}
204
-#endif
205
206
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
207
208
--
209
2.34.1
210
211
diff view generated by jsdifflib
New patch
1
From: Mike Frysinger <vapier@gentoo.org>
1
2
3
Use of the API was removed a while back, but the define wasn't.
4
5
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Message-Id: <20231015010046.16020-1-vapier@gentoo.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
10
include/tcg/tcg-op.h | 2 --
11
1 file changed, 2 deletions(-)
12
13
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/include/tcg/tcg-op.h
16
+++ b/include/tcg/tcg-op.h
17
@@ -XXX,XX +XXX,XX @@ static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1,
18
typedef TCGv_i32 TCGv;
19
#define tcg_temp_new() tcg_temp_new_i32()
20
#define tcg_global_mem_new tcg_global_mem_new_i32
21
-#define tcg_temp_free tcg_temp_free_i32
22
#define tcgv_tl_temp tcgv_i32_temp
23
#define tcg_gen_qemu_ld_tl tcg_gen_qemu_ld_i32
24
#define tcg_gen_qemu_st_tl tcg_gen_qemu_st_i32
25
@@ -XXX,XX +XXX,XX @@ typedef TCGv_i32 TCGv;
26
typedef TCGv_i64 TCGv;
27
#define tcg_temp_new() tcg_temp_new_i64()
28
#define tcg_global_mem_new tcg_global_mem_new_i64
29
-#define tcg_temp_free tcg_temp_free_i64
30
#define tcgv_tl_temp tcgv_i64_temp
31
#define tcg_gen_qemu_ld_tl tcg_gen_qemu_ld_i64
32
#define tcg_gen_qemu_st_tl tcg_gen_qemu_st_i64
33
--
34
2.34.1
35
36
diff view generated by jsdifflib
New patch
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
---
3
tcg/tcg-op.c | 16 ++++++++--------
4
1 file changed, 8 insertions(+), 8 deletions(-)
1
5
6
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
7
index XXXXXXX..XXXXXXX 100644
8
--- a/tcg/tcg-op.c
9
+++ b/tcg/tcg-op.c
10
@@ -XXX,XX +XXX,XX @@ void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
11
tcg_gen_op3_i32(INDEX_op_divu_i32, ret, arg1, arg2);
12
} else if (TCG_TARGET_HAS_div2_i32) {
13
TCGv_i32 t0 = tcg_temp_ebb_new_i32();
14
- tcg_gen_movi_i32(t0, 0);
15
- tcg_gen_op5_i32(INDEX_op_divu2_i32, ret, t0, arg1, t0, arg2);
16
+ TCGv_i32 zero = tcg_constant_i32(0);
17
+ tcg_gen_op5_i32(INDEX_op_divu2_i32, ret, t0, arg1, zero, arg2);
18
tcg_temp_free_i32(t0);
19
} else {
20
gen_helper_divu_i32(ret, arg1, arg2);
21
@@ -XXX,XX +XXX,XX @@ void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
22
tcg_temp_free_i32(t0);
23
} else if (TCG_TARGET_HAS_div2_i32) {
24
TCGv_i32 t0 = tcg_temp_ebb_new_i32();
25
- tcg_gen_movi_i32(t0, 0);
26
- tcg_gen_op5_i32(INDEX_op_divu2_i32, t0, ret, arg1, t0, arg2);
27
+ TCGv_i32 zero = tcg_constant_i32(0);
28
+ tcg_gen_op5_i32(INDEX_op_divu2_i32, t0, ret, arg1, zero, arg2);
29
tcg_temp_free_i32(t0);
30
} else {
31
gen_helper_remu_i32(ret, arg1, arg2);
32
@@ -XXX,XX +XXX,XX @@ void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
33
tcg_gen_op3_i64(INDEX_op_divu_i64, ret, arg1, arg2);
34
} else if (TCG_TARGET_HAS_div2_i64) {
35
TCGv_i64 t0 = tcg_temp_ebb_new_i64();
36
- tcg_gen_movi_i64(t0, 0);
37
- tcg_gen_op5_i64(INDEX_op_divu2_i64, ret, t0, arg1, t0, arg2);
38
+ TCGv_i64 zero = tcg_constant_i64(0);
39
+ tcg_gen_op5_i64(INDEX_op_divu2_i64, ret, t0, arg1, zero, arg2);
40
tcg_temp_free_i64(t0);
41
} else {
42
gen_helper_divu_i64(ret, arg1, arg2);
43
@@ -XXX,XX +XXX,XX @@ void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
44
tcg_temp_free_i64(t0);
45
} else if (TCG_TARGET_HAS_div2_i64) {
46
TCGv_i64 t0 = tcg_temp_ebb_new_i64();
47
- tcg_gen_movi_i64(t0, 0);
48
- tcg_gen_op5_i64(INDEX_op_divu2_i64, t0, ret, arg1, t0, arg2);
49
+ TCGv_i64 zero = tcg_constant_i64(0);
50
+ tcg_gen_op5_i64(INDEX_op_divu2_i64, t0, ret, arg1, zero, arg2);
51
tcg_temp_free_i64(t0);
52
} else {
53
gen_helper_remu_i64(ret, arg1, arg2);
54
--
55
2.34.1
diff view generated by jsdifflib
New patch
1
We already register allocate through extended basic blocks,
2
optimize through extended basic blocks as well.
1
3
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 8 +++++---
7
1 file changed, 5 insertions(+), 3 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static void finish_folding(OptContext *ctx, TCGOp *op)
14
int i, nb_oargs;
15
16
/*
17
- * For an opcode that ends a BB, reset all temp data.
18
- * We do no cross-BB optimization.
19
+ * We only optimize extended basic blocks. If the opcode ends a BB
20
+ * and is not a conditional branch, reset all temp data.
21
*/
22
if (def->flags & TCG_OPF_BB_END) {
23
- memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
24
ctx->prev_mb = NULL;
25
+ if (!(def->flags & TCG_OPF_COND_BRANCH)) {
26
+ memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
27
+ }
28
return;
29
}
30
31
--
32
2.34.1
diff view generated by jsdifflib
New patch
1
Do not require the translators to jump through concat and
2
extract of i64 in order to move values to and from env.
1
3
4
Tested-by: Song Gao <gaosong@loongson.cn>
5
Reviewed-by: Song Gao <gaosong@loongson.cn>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
include/tcg/tcg-op-common.h | 3 +++
10
tcg/tcg-op.c | 22 ++++++++++++++++++++++
11
2 files changed, 25 insertions(+)
12
13
diff --git a/include/tcg/tcg-op-common.h b/include/tcg/tcg-op-common.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/include/tcg/tcg-op-common.h
16
+++ b/include/tcg/tcg-op-common.h
17
@@ -XXX,XX +XXX,XX @@ void tcg_gen_mov_i128(TCGv_i128 dst, TCGv_i128 src);
18
void tcg_gen_extr_i128_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i128 arg);
19
void tcg_gen_concat_i64_i128(TCGv_i128 ret, TCGv_i64 lo, TCGv_i64 hi);
20
21
+void tcg_gen_ld_i128(TCGv_i128 ret, TCGv_ptr base, tcg_target_long offset);
22
+void tcg_gen_st_i128(TCGv_i128 val, TCGv_ptr base, tcg_target_long offset);
23
+
24
static inline void tcg_gen_concat32_i64(TCGv_i64 ret, TCGv_i64 lo, TCGv_i64 hi)
25
{
26
tcg_gen_deposit_i64(ret, lo, hi, 32, 32);
27
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/tcg/tcg-op.c
30
+++ b/tcg/tcg-op.c
31
@@ -XXX,XX +XXX,XX @@ void tcg_gen_mov_i128(TCGv_i128 dst, TCGv_i128 src)
32
}
33
}
34
35
+void tcg_gen_ld_i128(TCGv_i128 ret, TCGv_ptr base, tcg_target_long offset)
36
+{
37
+ if (HOST_BIG_ENDIAN) {
38
+ tcg_gen_ld_i64(TCGV128_HIGH(ret), base, offset);
39
+ tcg_gen_ld_i64(TCGV128_LOW(ret), base, offset + 8);
40
+ } else {
41
+ tcg_gen_ld_i64(TCGV128_LOW(ret), base, offset);
42
+ tcg_gen_ld_i64(TCGV128_HIGH(ret), base, offset + 8);
43
+ }
44
+}
45
+
46
+void tcg_gen_st_i128(TCGv_i128 val, TCGv_ptr base, tcg_target_long offset)
47
+{
48
+ if (HOST_BIG_ENDIAN) {
49
+ tcg_gen_st_i64(TCGV128_HIGH(val), base, offset);
50
+ tcg_gen_st_i64(TCGV128_LOW(val), base, offset + 8);
51
+ } else {
52
+ tcg_gen_st_i64(TCGV128_LOW(val), base, offset);
53
+ tcg_gen_st_i64(TCGV128_HIGH(val), base, offset + 8);
54
+ }
55
+}
56
+
57
/* QEMU specific operations. */
58
59
void tcg_gen_exit_tb(const TranslationBlock *tb, unsigned idx)
60
--
61
2.34.1
62
63
diff view generated by jsdifflib
1
From: Dickon Hood <dickon.hood@codethink.co.uk>
2
3
Rotates have been fixed up to only allow for reasonable rotate amounts
4
(ie, no rotates >7 on an 8b value etc.) This fixes a problem with riscv
5
vector rotate instructions.
6
7
Signed-off-by: Dickon Hood <dickon.hood@codethink.co.uk>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-Id: <20230428144757.57530-9-lawrence.hunter@codethink.co.uk>
10
[rth: Mask shifts in both directions.]
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
2
---
13
include/qemu/bitops.h | 16 ++++++++--------
3
target/i386/tcg/translate.c | 63 +++++++++++++++++--------------------
14
1 file changed, 8 insertions(+), 8 deletions(-)
4
1 file changed, 29 insertions(+), 34 deletions(-)
15
5
16
diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h
6
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
17
index XXXXXXX..XXXXXXX 100644
7
index XXXXXXX..XXXXXXX 100644
18
--- a/include/qemu/bitops.h
8
--- a/target/i386/tcg/translate.c
19
+++ b/include/qemu/bitops.h
9
+++ b/target/i386/tcg/translate.c
20
@@ -XXX,XX +XXX,XX @@ static inline unsigned long find_first_zero_bit(const unsigned long *addr,
10
@@ -XXX,XX +XXX,XX @@ static inline void gen_stq_env_A0(DisasContext *s, int offset)
21
*/
11
22
static inline uint8_t rol8(uint8_t word, unsigned int shift)
12
static inline void gen_ldo_env_A0(DisasContext *s, int offset, bool align)
23
{
13
{
24
- return (word << shift) | (word >> ((8 - shift) & 7));
14
+ MemOp atom = (s->cpuid_ext_features & CPUID_EXT_AVX
25
+ return (word << (shift & 7)) | (word >> (-shift & 7));
15
+ ? MO_ATOM_IFALIGN : MO_ATOM_IFALIGN_PAIR);
16
+ MemOp mop = MO_128 | MO_LE | atom | (align ? MO_ALIGN_16 : 0);
17
int mem_index = s->mem_index;
18
- tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index,
19
- MO_LEUQ | (align ? MO_ALIGN_16 : 0));
20
- tcg_gen_st_i64(s->tmp1_i64, tcg_env, offset + offsetof(XMMReg, XMM_Q(0)));
21
- tcg_gen_addi_tl(s->tmp0, s->A0, 8);
22
- tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
23
- tcg_gen_st_i64(s->tmp1_i64, tcg_env, offset + offsetof(XMMReg, XMM_Q(1)));
24
+ TCGv_i128 t = tcg_temp_new_i128();
25
+
26
+ tcg_gen_qemu_ld_i128(t, s->A0, mem_index, mop);
27
+ tcg_gen_st_i128(t, tcg_env, offset);
26
}
28
}
27
29
28
/**
30
static inline void gen_sto_env_A0(DisasContext *s, int offset, bool align)
29
@@ -XXX,XX +XXX,XX @@ static inline uint8_t rol8(uint8_t word, unsigned int shift)
30
*/
31
static inline uint8_t ror8(uint8_t word, unsigned int shift)
32
{
31
{
33
- return (word >> shift) | (word << ((8 - shift) & 7));
32
+ MemOp atom = (s->cpuid_ext_features & CPUID_EXT_AVX
34
+ return (word >> (shift & 7)) | (word << (-shift & 7));
33
+ ? MO_ATOM_IFALIGN : MO_ATOM_IFALIGN_PAIR);
34
+ MemOp mop = MO_128 | MO_LE | atom | (align ? MO_ALIGN_16 : 0);
35
int mem_index = s->mem_index;
36
- tcg_gen_ld_i64(s->tmp1_i64, tcg_env, offset + offsetof(XMMReg, XMM_Q(0)));
37
- tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index,
38
- MO_LEUQ | (align ? MO_ALIGN_16 : 0));
39
- tcg_gen_addi_tl(s->tmp0, s->A0, 8);
40
- tcg_gen_ld_i64(s->tmp1_i64, tcg_env, offset + offsetof(XMMReg, XMM_Q(1)));
41
- tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
42
+ TCGv_i128 t = tcg_temp_new_i128();
43
+
44
+ tcg_gen_ld_i128(t, tcg_env, offset);
45
+ tcg_gen_qemu_st_i128(t, s->A0, mem_index, mop);
35
}
46
}
36
47
37
/**
48
static void gen_ldy_env_A0(DisasContext *s, int offset, bool align)
38
@@ -XXX,XX +XXX,XX @@ static inline uint8_t ror8(uint8_t word, unsigned int shift)
39
*/
40
static inline uint16_t rol16(uint16_t word, unsigned int shift)
41
{
49
{
42
- return (word << shift) | (word >> ((16 - shift) & 15));
50
+ MemOp mop = MO_128 | MO_LE | MO_ATOM_IFALIGN_PAIR;
43
+ return (word << (shift & 15)) | (word >> (-shift & 15));
51
int mem_index = s->mem_index;
52
- tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index,
53
- MO_LEUQ | (align ? MO_ALIGN_32 : 0));
54
- tcg_gen_st_i64(s->tmp1_i64, tcg_env, offset + offsetof(YMMReg, YMM_Q(0)));
55
- tcg_gen_addi_tl(s->tmp0, s->A0, 8);
56
- tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
57
- tcg_gen_st_i64(s->tmp1_i64, tcg_env, offset + offsetof(YMMReg, YMM_Q(1)));
58
+ TCGv_i128 t0 = tcg_temp_new_i128();
59
+ TCGv_i128 t1 = tcg_temp_new_i128();
60
61
+ tcg_gen_qemu_ld_i128(t0, s->A0, mem_index, mop | (align ? MO_ALIGN_32 : 0));
62
tcg_gen_addi_tl(s->tmp0, s->A0, 16);
63
- tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
64
- tcg_gen_st_i64(s->tmp1_i64, tcg_env, offset + offsetof(YMMReg, YMM_Q(2)));
65
- tcg_gen_addi_tl(s->tmp0, s->A0, 24);
66
- tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
67
- tcg_gen_st_i64(s->tmp1_i64, tcg_env, offset + offsetof(YMMReg, YMM_Q(3)));
68
+ tcg_gen_qemu_ld_i128(t1, s->tmp0, mem_index, mop);
69
+
70
+ tcg_gen_st_i128(t0, tcg_env, offset + offsetof(YMMReg, YMM_X(0)));
71
+ tcg_gen_st_i128(t1, tcg_env, offset + offsetof(YMMReg, YMM_X(1)));
44
}
72
}
45
73
46
/**
74
static void gen_sty_env_A0(DisasContext *s, int offset, bool align)
47
@@ -XXX,XX +XXX,XX @@ static inline uint16_t rol16(uint16_t word, unsigned int shift)
48
*/
49
static inline uint16_t ror16(uint16_t word, unsigned int shift)
50
{
75
{
51
- return (word >> shift) | (word << ((16 - shift) & 15));
76
+ MemOp mop = MO_128 | MO_LE | MO_ATOM_IFALIGN_PAIR;
52
+ return (word >> (shift & 15)) | (word << (-shift & 15));
77
int mem_index = s->mem_index;
78
- tcg_gen_ld_i64(s->tmp1_i64, tcg_env, offset + offsetof(YMMReg, YMM_Q(0)));
79
- tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index,
80
- MO_LEUQ | (align ? MO_ALIGN_32 : 0));
81
- tcg_gen_addi_tl(s->tmp0, s->A0, 8);
82
- tcg_gen_ld_i64(s->tmp1_i64, tcg_env, offset + offsetof(YMMReg, YMM_Q(1)));
83
- tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
84
+ TCGv_i128 t = tcg_temp_new_i128();
85
+
86
+ tcg_gen_ld_i128(t, tcg_env, offset + offsetof(YMMReg, YMM_X(0)));
87
+ tcg_gen_qemu_st_i128(t, s->A0, mem_index, mop | (align ? MO_ALIGN_32 : 0));
88
tcg_gen_addi_tl(s->tmp0, s->A0, 16);
89
- tcg_gen_ld_i64(s->tmp1_i64, tcg_env, offset + offsetof(YMMReg, YMM_Q(2)));
90
- tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
91
- tcg_gen_addi_tl(s->tmp0, s->A0, 24);
92
- tcg_gen_ld_i64(s->tmp1_i64, tcg_env, offset + offsetof(YMMReg, YMM_Q(3)));
93
- tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
94
+ tcg_gen_ld_i128(t, tcg_env, offset + offsetof(YMMReg, YMM_X(1)));
95
+ tcg_gen_qemu_st_i128(t, s->tmp0, mem_index, mop);
53
}
96
}
54
97
55
/**
98
#include "decode-new.h"
56
@@ -XXX,XX +XXX,XX @@ static inline uint16_t ror16(uint16_t word, unsigned int shift)
57
*/
58
static inline uint32_t rol32(uint32_t word, unsigned int shift)
59
{
60
- return (word << shift) | (word >> ((32 - shift) & 31));
61
+ return (word << (shift & 31)) | (word >> (-shift & 31));
62
}
63
64
/**
65
@@ -XXX,XX +XXX,XX @@ static inline uint32_t rol32(uint32_t word, unsigned int shift)
66
*/
67
static inline uint32_t ror32(uint32_t word, unsigned int shift)
68
{
69
- return (word >> shift) | (word << ((32 - shift) & 31));
70
+ return (word >> (shift & 31)) | (word << (-shift & 31));
71
}
72
73
/**
74
@@ -XXX,XX +XXX,XX @@ static inline uint32_t ror32(uint32_t word, unsigned int shift)
75
*/
76
static inline uint64_t rol64(uint64_t word, unsigned int shift)
77
{
78
- return (word << shift) | (word >> ((64 - shift) & 63));
79
+ return (word << (shift & 63)) | (word >> (-shift & 63));
80
}
81
82
/**
83
@@ -XXX,XX +XXX,XX @@ static inline uint64_t rol64(uint64_t word, unsigned int shift)
84
*/
85
static inline uint64_t ror64(uint64_t word, unsigned int shift)
86
{
87
- return (word >> shift) | (word << ((64 - shift) & 63));
88
+ return (word >> (shift & 63)) | (word << (-shift & 63));
89
}
90
91
/**
92
--
99
--
93
2.34.1
100
2.34.1
diff view generated by jsdifflib
New patch
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
2
3
This can be useful to write a shift bit extraction that does not
4
depend on TARGET_LONG_BITS.
5
6
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
7
Message-Id: <20231019104648.389942-15-pbonzini@redhat.com>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
10
include/tcg/tcg-op-common.h | 4 ++++
11
include/tcg/tcg-op.h | 2 ++
12
tcg/tcg-op.c | 12 ++++++++++++
13
3 files changed, 18 insertions(+)
14
15
diff --git a/include/tcg/tcg-op-common.h b/include/tcg/tcg-op-common.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/include/tcg/tcg-op-common.h
18
+++ b/include/tcg/tcg-op-common.h
19
@@ -XXX,XX +XXX,XX @@ void tcg_gen_setcondi_i32(TCGCond cond, TCGv_i32 ret,
20
TCGv_i32 arg1, int32_t arg2);
21
void tcg_gen_negsetcond_i32(TCGCond cond, TCGv_i32 ret,
22
TCGv_i32 arg1, TCGv_i32 arg2);
23
+void tcg_gen_negsetcondi_i32(TCGCond cond, TCGv_i32 ret,
24
+ TCGv_i32 arg1, int32_t arg2);
25
void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 c1,
26
TCGv_i32 c2, TCGv_i32 v1, TCGv_i32 v2);
27
void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
28
@@ -XXX,XX +XXX,XX @@ void tcg_gen_setcondi_i64(TCGCond cond, TCGv_i64 ret,
29
TCGv_i64 arg1, int64_t arg2);
30
void tcg_gen_negsetcond_i64(TCGCond cond, TCGv_i64 ret,
31
TCGv_i64 arg1, TCGv_i64 arg2);
32
+void tcg_gen_negsetcondi_i64(TCGCond cond, TCGv_i64 ret,
33
+ TCGv_i64 arg1, int64_t arg2);
34
void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 c1,
35
TCGv_i64 c2, TCGv_i64 v1, TCGv_i64 v2);
36
void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
37
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
38
index XXXXXXX..XXXXXXX 100644
39
--- a/include/tcg/tcg-op.h
40
+++ b/include/tcg/tcg-op.h
41
@@ -XXX,XX +XXX,XX @@ DEF_ATOMIC2(tcg_gen_atomic_umax_fetch, i64)
42
#define tcg_gen_setcond_tl tcg_gen_setcond_i64
43
#define tcg_gen_setcondi_tl tcg_gen_setcondi_i64
44
#define tcg_gen_negsetcond_tl tcg_gen_negsetcond_i64
45
+#define tcg_gen_negsetcondi_tl tcg_gen_negsetcondi_i64
46
#define tcg_gen_mul_tl tcg_gen_mul_i64
47
#define tcg_gen_muli_tl tcg_gen_muli_i64
48
#define tcg_gen_div_tl tcg_gen_div_i64
49
@@ -XXX,XX +XXX,XX @@ DEF_ATOMIC2(tcg_gen_atomic_umax_fetch, i64)
50
#define tcg_gen_setcond_tl tcg_gen_setcond_i32
51
#define tcg_gen_setcondi_tl tcg_gen_setcondi_i32
52
#define tcg_gen_negsetcond_tl tcg_gen_negsetcond_i32
53
+#define tcg_gen_negsetcondi_tl tcg_gen_negsetcondi_i32
54
#define tcg_gen_mul_tl tcg_gen_mul_i32
55
#define tcg_gen_muli_tl tcg_gen_muli_i32
56
#define tcg_gen_div_tl tcg_gen_div_i32
57
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
58
index XXXXXXX..XXXXXXX 100644
59
--- a/tcg/tcg-op.c
60
+++ b/tcg/tcg-op.c
61
@@ -XXX,XX +XXX,XX @@ void tcg_gen_negsetcond_i32(TCGCond cond, TCGv_i32 ret,
62
}
63
}
64
65
+void tcg_gen_negsetcondi_i32(TCGCond cond, TCGv_i32 ret,
66
+ TCGv_i32 arg1, int32_t arg2)
67
+{
68
+ tcg_gen_negsetcond_i32(cond, ret, arg1, tcg_constant_i32(arg2));
69
+}
70
+
71
void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
72
{
73
if (arg2 == 0) {
74
@@ -XXX,XX +XXX,XX @@ void tcg_gen_setcondi_i64(TCGCond cond, TCGv_i64 ret,
75
}
76
}
77
78
+void tcg_gen_negsetcondi_i64(TCGCond cond, TCGv_i64 ret,
79
+ TCGv_i64 arg1, int64_t arg2)
80
+{
81
+ tcg_gen_negsetcond_i64(cond, ret, arg1, tcg_constant_i64(arg2));
82
+}
83
+
84
void tcg_gen_negsetcond_i64(TCGCond cond, TCGv_i64 ret,
85
TCGv_i64 arg1, TCGv_i64 arg2)
86
{
87
--
88
2.34.1
diff view generated by jsdifflib
New patch
1
The two concrete type functions already existed, merely needing
2
a bit of hardening to invalid inputs.
1
3
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
include/tcg/tcg-op-common.h | 2 ++
8
include/tcg/tcg-op.h | 2 ++
9
tcg/tcg-op-ldst.c | 14 ++++++++++----
10
3 files changed, 14 insertions(+), 4 deletions(-)
11
12
diff --git a/include/tcg/tcg-op-common.h b/include/tcg/tcg-op-common.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/include/tcg/tcg-op-common.h
15
+++ b/include/tcg/tcg-op-common.h
16
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ext8s_i32(TCGv_i32 ret, TCGv_i32 arg);
17
void tcg_gen_ext16s_i32(TCGv_i32 ret, TCGv_i32 arg);
18
void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg);
19
void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg);
20
+void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc);
21
void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags);
22
void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg);
23
void tcg_gen_hswap_i32(TCGv_i32 ret, TCGv_i32 arg);
24
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ext32s_i64(TCGv_i64 ret, TCGv_i64 arg);
25
void tcg_gen_ext8u_i64(TCGv_i64 ret, TCGv_i64 arg);
26
void tcg_gen_ext16u_i64(TCGv_i64 ret, TCGv_i64 arg);
27
void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg);
28
+void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc);
29
void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg, int flags);
30
void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg, int flags);
31
void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg);
32
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
33
index XXXXXXX..XXXXXXX 100644
34
--- a/include/tcg/tcg-op.h
35
+++ b/include/tcg/tcg-op.h
36
@@ -XXX,XX +XXX,XX @@ DEF_ATOMIC2(tcg_gen_atomic_umax_fetch, i64)
37
#define tcg_gen_ext16s_tl tcg_gen_ext16s_i64
38
#define tcg_gen_ext32u_tl tcg_gen_ext32u_i64
39
#define tcg_gen_ext32s_tl tcg_gen_ext32s_i64
40
+#define tcg_gen_ext_tl tcg_gen_ext_i64
41
#define tcg_gen_bswap16_tl tcg_gen_bswap16_i64
42
#define tcg_gen_bswap32_tl tcg_gen_bswap32_i64
43
#define tcg_gen_bswap64_tl tcg_gen_bswap64_i64
44
@@ -XXX,XX +XXX,XX @@ DEF_ATOMIC2(tcg_gen_atomic_umax_fetch, i64)
45
#define tcg_gen_ext16s_tl tcg_gen_ext16s_i32
46
#define tcg_gen_ext32u_tl tcg_gen_mov_i32
47
#define tcg_gen_ext32s_tl tcg_gen_mov_i32
48
+#define tcg_gen_ext_tl tcg_gen_ext_i32
49
#define tcg_gen_bswap16_tl tcg_gen_bswap16_i32
50
#define tcg_gen_bswap32_tl(D, S, F) tcg_gen_bswap32_i32(D, S)
51
#define tcg_gen_bswap_tl tcg_gen_bswap32_i32
52
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
53
index XXXXXXX..XXXXXXX 100644
54
--- a/tcg/tcg-op-ldst.c
55
+++ b/tcg/tcg-op-ldst.c
56
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i128_chk(TCGv_i128 val, TCGTemp *addr, TCGArg idx,
57
tcg_gen_qemu_st_i128_int(val, addr, idx, memop);
58
}
59
60
-static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
61
+void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
62
{
63
switch (opc & MO_SSIZE) {
64
case MO_SB:
65
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
66
case MO_UW:
67
tcg_gen_ext16u_i32(ret, val);
68
break;
69
- default:
70
+ case MO_UL:
71
+ case MO_SL:
72
tcg_gen_mov_i32(ret, val);
73
break;
74
+ default:
75
+ g_assert_not_reached();
76
}
77
}
78
79
-static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
80
+void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
81
{
82
switch (opc & MO_SSIZE) {
83
case MO_SB:
84
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
85
case MO_UL:
86
tcg_gen_ext32u_i64(ret, val);
87
break;
88
- default:
89
+ case MO_UQ:
90
+ case MO_SQ:
91
tcg_gen_mov_i64(ret, val);
92
break;
93
+ default:
94
+ g_assert_not_reached();
95
}
96
}
97
98
--
99
2.34.1
100
101
diff view generated by jsdifflib
New patch
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
2
3
This will also come in handy later for "less than" comparisons.
4
5
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
6
Message-Id: <03ba02fd-fade-4409-be16-2f81a5690b4c@redhat.com>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
10
include/exec/target_long.h | 2 ++
11
1 file changed, 2 insertions(+)
12
13
diff --git a/include/exec/target_long.h b/include/exec/target_long.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/include/exec/target_long.h
16
+++ b/include/exec/target_long.h
17
@@ -XXX,XX +XXX,XX @@ typedef uint32_t target_ulong;
18
#define TARGET_FMT_lx "%08x"
19
#define TARGET_FMT_ld "%d"
20
#define TARGET_FMT_lu "%u"
21
+#define MO_TL MO_32
22
#elif TARGET_LONG_SIZE == 8
23
typedef int64_t target_long;
24
typedef uint64_t target_ulong;
25
#define TARGET_FMT_lx "%016" PRIx64
26
#define TARGET_FMT_ld "%" PRId64
27
#define TARGET_FMT_lu "%" PRIu64
28
+#define MO_TL MO_64
29
#else
30
#error TARGET_LONG_SIZE undefined
31
#endif
32
--
33
2.34.1
diff view generated by jsdifflib
New patch
1
The ext_and_shift_reg helper does this plus a shift.
2
The non-zero check for shift count is duplicate to
3
the one done within tcg_gen_shli_i64.
1
4
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/arm/tcg/translate-a64.c | 37 ++--------------------------------
8
1 file changed, 2 insertions(+), 35 deletions(-)
9
10
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/arm/tcg/translate-a64.c
13
+++ b/target/arm/tcg/translate-a64.c
14
@@ -XXX,XX +XXX,XX @@ static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
15
int extsize = extract32(option, 0, 2);
16
bool is_signed = extract32(option, 2, 1);
17
18
- if (is_signed) {
19
- switch (extsize) {
20
- case 0:
21
- tcg_gen_ext8s_i64(tcg_out, tcg_in);
22
- break;
23
- case 1:
24
- tcg_gen_ext16s_i64(tcg_out, tcg_in);
25
- break;
26
- case 2:
27
- tcg_gen_ext32s_i64(tcg_out, tcg_in);
28
- break;
29
- case 3:
30
- tcg_gen_mov_i64(tcg_out, tcg_in);
31
- break;
32
- }
33
- } else {
34
- switch (extsize) {
35
- case 0:
36
- tcg_gen_ext8u_i64(tcg_out, tcg_in);
37
- break;
38
- case 1:
39
- tcg_gen_ext16u_i64(tcg_out, tcg_in);
40
- break;
41
- case 2:
42
- tcg_gen_ext32u_i64(tcg_out, tcg_in);
43
- break;
44
- case 3:
45
- tcg_gen_mov_i64(tcg_out, tcg_in);
46
- break;
47
- }
48
- }
49
-
50
- if (shift) {
51
- tcg_gen_shli_i64(tcg_out, tcg_out, shift);
52
- }
53
+ tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0));
54
+ tcg_gen_shli_i64(tcg_out, tcg_out, shift);
55
}
56
57
static inline void gen_check_sp_alignment(DisasContext *s)
58
--
59
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
target/i386/tcg/translate.c | 28 +++-------------------------
5
1 file changed, 3 insertions(+), 25 deletions(-)
1
6
7
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/target/i386/tcg/translate.c
10
+++ b/target/i386/tcg/translate.c
11
@@ -XXX,XX +XXX,XX @@ static inline void gen_op_movl_T0_Dshift(DisasContext *s, MemOp ot)
12
13
static TCGv gen_ext_tl(TCGv dst, TCGv src, MemOp size, bool sign)
14
{
15
- switch (size) {
16
- case MO_8:
17
- if (sign) {
18
- tcg_gen_ext8s_tl(dst, src);
19
- } else {
20
- tcg_gen_ext8u_tl(dst, src);
21
- }
22
- return dst;
23
- case MO_16:
24
- if (sign) {
25
- tcg_gen_ext16s_tl(dst, src);
26
- } else {
27
- tcg_gen_ext16u_tl(dst, src);
28
- }
29
- return dst;
30
-#ifdef TARGET_X86_64
31
- case MO_32:
32
- if (sign) {
33
- tcg_gen_ext32s_tl(dst, src);
34
- } else {
35
- tcg_gen_ext32u_tl(dst, src);
36
- }
37
- return dst;
38
-#endif
39
- default:
40
+ if (size == MO_TL) {
41
return src;
42
}
43
+ tcg_gen_ext_tl(dst, src, size | (sign ? MO_SIGN : 0));
44
+ return dst;
45
}
46
47
static void gen_extu(MemOp ot, TCGv reg)
48
--
49
2.34.1
50
51
diff view generated by jsdifflib
New patch
1
We still need to check OS_{BYTE,WORD,LONG},
2
because m68k includes floating point in OS_*.
1
3
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/m68k/translate.c | 23 +++--------------------
8
1 file changed, 3 insertions(+), 20 deletions(-)
9
10
diff --git a/target/m68k/translate.c b/target/m68k/translate.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/m68k/translate.c
13
+++ b/target/m68k/translate.c
14
@@ -XXX,XX +XXX,XX @@ static inline void gen_ext(TCGv res, TCGv val, int opsize, int sign)
15
{
16
switch (opsize) {
17
case OS_BYTE:
18
- if (sign) {
19
- tcg_gen_ext8s_i32(res, val);
20
- } else {
21
- tcg_gen_ext8u_i32(res, val);
22
- }
23
- break;
24
case OS_WORD:
25
- if (sign) {
26
- tcg_gen_ext16s_i32(res, val);
27
- } else {
28
- tcg_gen_ext16u_i32(res, val);
29
- }
30
- break;
31
case OS_LONG:
32
- tcg_gen_mov_i32(res, val);
33
+ tcg_gen_ext_i32(res, val, opsize | (sign ? MO_SIGN : 0));
34
break;
35
default:
36
g_assert_not_reached();
37
@@ -XXX,XX +XXX,XX @@ static int gen_ea_mode_fp(CPUM68KState *env, DisasContext *s, int mode,
38
tmp = tcg_temp_new();
39
switch (opsize) {
40
case OS_BYTE:
41
- tcg_gen_ext8s_i32(tmp, reg);
42
- gen_helper_exts32(tcg_env, fp, tmp);
43
- break;
44
case OS_WORD:
45
- tcg_gen_ext16s_i32(tmp, reg);
46
- gen_helper_exts32(tcg_env, fp, tmp);
47
- break;
48
case OS_LONG:
49
- gen_helper_exts32(tcg_env, fp, reg);
50
+ tcg_gen_ext_i32(tmp, reg, opsize | MO_SIGN);
51
+ gen_helper_exts32(tcg_env, fp, tmp);
52
break;
53
case OS_SINGLE:
54
gen_helper_extf32(tcg_env, fp, reg);
55
--
56
2.34.1
57
58
diff view generated by jsdifflib
New patch
1
Reviewed-by: Yoshinori Sato <ysato@users.sourceforge.jp>
2
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
target/rx/translate.c | 11 +++--------
6
1 file changed, 3 insertions(+), 8 deletions(-)
1
7
8
diff --git a/target/rx/translate.c b/target/rx/translate.c
9
index XXXXXXX..XXXXXXX 100644
10
--- a/target/rx/translate.c
11
+++ b/target/rx/translate.c
12
@@ -XXX,XX +XXX,XX @@ static bool trans_MOV_ra(DisasContext *ctx, arg_MOV_ra *a)
13
/* mov.<bwl> rs,rd */
14
static bool trans_MOV_mm(DisasContext *ctx, arg_MOV_mm *a)
15
{
16
- static void (* const mov[])(TCGv ret, TCGv arg) = {
17
- tcg_gen_ext8s_i32, tcg_gen_ext16s_i32, tcg_gen_mov_i32,
18
- };
19
TCGv tmp, mem, addr;
20
+
21
if (a->lds == 3 && a->ldd == 3) {
22
/* mov.<bwl> rs,rd */
23
- mov[a->sz](cpu_regs[a->rd], cpu_regs[a->rs]);
24
+ tcg_gen_ext_i32(cpu_regs[a->rd], cpu_regs[a->rs], a->sz | MO_SIGN);
25
return true;
26
}
27
28
@@ -XXX,XX +XXX,XX @@ static bool trans_MOVU_mr(DisasContext *ctx, arg_MOVU_mr *a)
29
/* movu.<bw> rs,rd */
30
static bool trans_MOVU_rr(DisasContext *ctx, arg_MOVU_rr *a)
31
{
32
- static void (* const ext[])(TCGv ret, TCGv arg) = {
33
- tcg_gen_ext8u_i32, tcg_gen_ext16u_i32,
34
- };
35
- ext[a->sz](cpu_regs[a->rd], cpu_regs[a->rs]);
36
+ tcg_gen_ext_i32(cpu_regs[a->rd], cpu_regs[a->rs], a->sz);
37
return true;
38
}
39
40
--
41
2.34.1
42
43
diff view generated by jsdifflib
New patch
1
The EXTR instructions can use the extract opcodes.
1
2
3
Reviewed-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
target/tricore/translate.c | 20 ++++----------------
7
1 file changed, 4 insertions(+), 16 deletions(-)
8
9
diff --git a/target/tricore/translate.c b/target/tricore/translate.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/target/tricore/translate.c
12
+++ b/target/tricore/translate.c
13
@@ -XXX,XX +XXX,XX @@ static void decode_rrpw_extract_insert(DisasContext *ctx)
14
switch (op2) {
15
case OPC2_32_RRPW_EXTR:
16
if (width == 0) {
17
- tcg_gen_movi_tl(cpu_gpr_d[r3], 0);
18
- break;
19
- }
20
-
21
- if (pos + width <= 32) {
22
- /* optimize special cases */
23
- if ((pos == 0) && (width == 8)) {
24
- tcg_gen_ext8s_tl(cpu_gpr_d[r3], cpu_gpr_d[r1]);
25
- } else if ((pos == 0) && (width == 16)) {
26
- tcg_gen_ext16s_tl(cpu_gpr_d[r3], cpu_gpr_d[r1]);
27
- } else {
28
- tcg_gen_shli_tl(cpu_gpr_d[r3], cpu_gpr_d[r1], 32 - pos - width);
29
- tcg_gen_sari_tl(cpu_gpr_d[r3], cpu_gpr_d[r3], 32 - width);
30
- }
31
+ tcg_gen_movi_tl(cpu_gpr_d[r3], 0);
32
+ } else if (pos + width <= 32) {
33
+ tcg_gen_sextract_tl(cpu_gpr_d[r3], cpu_gpr_d[r1], pos, width);
34
}
35
break;
36
case OPC2_32_RRPW_EXTR_U:
37
if (width == 0) {
38
tcg_gen_movi_tl(cpu_gpr_d[r3], 0);
39
} else {
40
- tcg_gen_shri_tl(cpu_gpr_d[r3], cpu_gpr_d[r1], pos);
41
- tcg_gen_andi_tl(cpu_gpr_d[r3], cpu_gpr_d[r3], ~0u >> (32-width));
42
+ tcg_gen_extract_tl(cpu_gpr_d[r3], cpu_gpr_d[r1], pos, width);
43
}
44
break;
45
case OPC2_32_RRPW_IMASK:
46
--
47
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Max Filippov <jcmvbkbc@gmail.com>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
target/xtensa/translate.c | 12 +-----------
5
1 file changed, 1 insertion(+), 11 deletions(-)
1
6
7
diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/target/xtensa/translate.c
10
+++ b/target/xtensa/translate.c
11
@@ -XXX,XX +XXX,XX @@ static void translate_salt(DisasContext *dc, const OpcodeArg arg[],
12
static void translate_sext(DisasContext *dc, const OpcodeArg arg[],
13
const uint32_t par[])
14
{
15
- int shift = 31 - arg[2].imm;
16
-
17
- if (shift == 24) {
18
- tcg_gen_ext8s_i32(arg[0].out, arg[1].in);
19
- } else if (shift == 16) {
20
- tcg_gen_ext16s_i32(arg[0].out, arg[1].in);
21
- } else {
22
- TCGv_i32 tmp = tcg_temp_new_i32();
23
- tcg_gen_shli_i32(tmp, arg[1].in, shift);
24
- tcg_gen_sari_i32(arg[0].out, tmp, shift);
25
- }
26
+ tcg_gen_sextract_i32(arg[0].out, arg[1].in, 0, arg[2].imm + 1);
27
}
28
29
static uint32_t test_exceptions_simcall(DisasContext *dc,
30
--
31
2.34.1
diff view generated by jsdifflib