1
The following changes since commit e0175b71638cf4398903c0d25f93fe62e0606389:
1
The following changes since commit a36d64f43325fa503075cc9408ddabb69b32f829:
2
2
3
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20200228' into staging (2020-02-28 16:39:27 +0000)
3
Merge remote-tracking branch 'remotes/stsquad/tags/pull-testing-and-gdbstub-060520-1' into staging (2020-05-06 14:06:00 +0100)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://github.com/rth7680/qemu.git tags/pull-tcg-20200228
7
https://github.com/rth7680/qemu.git tags/pull-tcg-20200506
8
8
9
for you to fetch changes up to 600e17b261555c56a048781b8dd5ba3985650013:
9
for you to fetch changes up to 07dada0336a83002dfa8673a9220a88e13d9a45c:
10
10
11
accel/tcg: increase default code gen buffer size for 64 bit (2020-02-28 17:43:31 -0800)
11
tcg: Fix integral argument type to tcg_gen_rot[rl]i_i{32,64} (2020-05-06 09:25:10 -0700)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Fix race in cpu_exec_step_atomic.
14
Add tcg_gen_gvec_dup_imm
15
Work around compile failure with -fno-inine.
15
Misc tcg patches
16
Expand tcg/arm epilogue inline.
17
Adjustments to the default code gen buffer size.
18
16
19
----------------------------------------------------------------
17
----------------------------------------------------------------
20
Alex Bennée (5):
18
Richard Henderson (10):
21
accel/tcg: fix race in cpu_exec_step_atomic (bug 1863025)
19
tcg: Add tcg_gen_gvec_dup_imm
22
accel/tcg: use units.h for defining code gen buffer sizes
20
target/s390x: Use tcg_gen_gvec_dup_imm
23
accel/tcg: remove link between guest ram and TCG cache size
21
target/ppc: Use tcg_gen_gvec_dup_imm
24
accel/tcg: only USE_STATIC_CODE_GEN_BUFFER on 32 bit hosts
22
target/arm: Use tcg_gen_gvec_dup_imm
25
accel/tcg: increase default code gen buffer size for 64 bit
23
tcg: Use tcg_gen_gvec_dup_imm in logical simplifications
24
tcg: Remove tcg_gen_gvec_dup{8,16,32,64}i
25
tcg: Add tcg_gen_gvec_dup_tl
26
tcg: Improve vector tail clearing
27
tcg: Add load_dest parameter to GVecGen2
28
tcg: Fix integral argument type to tcg_gen_rot[rl]i_i{32,64}
26
29
27
Richard Henderson (2):
30
include/tcg/tcg-op-gvec.h | 13 ++-
28
tcg/arm: Split out tcg_out_epilogue
31
include/tcg/tcg-op.h | 8 +-
29
tcg/arm: Expand epilogue inline
32
target/arm/translate-a64.c | 10 +--
33
target/arm/translate-sve.c | 12 ++-
34
target/arm/translate.c | 9 +-
35
target/ppc/translate/vmx-impl.inc.c | 32 +++----
36
target/ppc/translate/vsx-impl.inc.c | 2 +-
37
target/s390x/translate_vx.inc.c | 41 ++-------
38
tcg/tcg-op-gvec.c | 162 +++++++++++++++++++++++-------------
39
tcg/tcg-op.c | 16 ++--
40
10 files changed, 166 insertions(+), 139 deletions(-)
30
41
31
Zenghui Yu (1):
32
compiler.h: Don't use compile-time assert when __NO_INLINE__ is defined
33
34
include/qemu/compiler.h | 2 +-
35
accel/tcg/cpu-exec.c | 21 ++++++++--------
36
accel/tcg/translate-all.c | 61 ++++++++++++++++++++++++++++-------------------
37
tcg/arm/tcg-target.inc.c | 29 ++++++++++------------
38
4 files changed, 60 insertions(+), 53 deletions(-)
39
diff view generated by jsdifflib
1
From: Richard Henderson <rth@twiddle.net>
1
Add a version of tcg_gen_dup_* that takes both immediate and
2
a vector element size operand. This will replace the set of
3
tcg_gen_gvec_dup{8,16,32,64}i functions that encode the element
4
size within the function name.
2
5
3
We will shortly use this function from tcg_out_op as well.
6
Reviewed-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
7
Reviewed-by: David Hildenbrand <david@redhat.com>
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
include/tcg/tcg-op-gvec.h | 2 ++
12
tcg/tcg-op-gvec.c | 7 +++++++
13
2 files changed, 9 insertions(+)
4
14
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
15
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
6
Signed-off-by: Richard Henderson <rth@twiddle.net>
7
---
8
tcg/arm/tcg-target.inc.c | 19 +++++++++++--------
9
1 file changed, 11 insertions(+), 8 deletions(-)
10
11
diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
12
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/arm/tcg-target.inc.c
17
--- a/include/tcg/tcg-op-gvec.h
14
+++ b/tcg/arm/tcg-target.inc.c
18
+++ b/include/tcg/tcg-op-gvec.h
15
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
19
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_ors(unsigned vece, uint32_t dofs, uint32_t aofs,
20
21
void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs,
22
uint32_t s, uint32_t m);
23
+void tcg_gen_gvec_dup_imm(unsigned vece, uint32_t dofs, uint32_t s,
24
+ uint32_t m, uint64_t imm);
25
void tcg_gen_gvec_dup_i32(unsigned vece, uint32_t dofs, uint32_t s,
26
uint32_t m, TCGv_i32);
27
void tcg_gen_gvec_dup_i64(unsigned vece, uint32_t dofs, uint32_t s,
28
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/tcg/tcg-op-gvec.c
31
+++ b/tcg/tcg-op-gvec.c
32
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_dup8i(uint32_t dofs, uint32_t oprsz,
33
do_dup(MO_8, dofs, oprsz, maxsz, NULL, NULL, x);
16
}
34
}
17
35
18
static tcg_insn_unit *tb_ret_addr;
36
+void tcg_gen_gvec_dup_imm(unsigned vece, uint32_t dofs, uint32_t oprsz,
19
+static void tcg_out_epilogue(TCGContext *s);
37
+ uint32_t maxsz, uint64_t x)
20
38
+{
21
static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
39
+ check_size_align(oprsz, maxsz, dofs);
22
const TCGArg *args, const int *const_args)
40
+ do_dup(vece, dofs, oprsz, maxsz, NULL, NULL, x);
23
@@ -XXX,XX +XXX,XX @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
24
+ TCG_TARGET_STACK_ALIGN - 1) \
25
& -TCG_TARGET_STACK_ALIGN)
26
27
+#define STACK_ADDEND (FRAME_SIZE - PUSH_SIZE)
28
+
29
static void tcg_target_qemu_prologue(TCGContext *s)
30
{
31
- int stack_addend;
32
-
33
/* Calling convention requires us to save r4-r11 and lr. */
34
/* stmdb sp!, { r4 - r11, lr } */
35
tcg_out32(s, (COND_AL << 28) | 0x092d4ff0);
36
37
/* Reserve callee argument and tcg temp space. */
38
- stack_addend = FRAME_SIZE - PUSH_SIZE;
39
-
40
tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK,
41
- TCG_REG_CALL_STACK, stack_addend, 1);
42
+ TCG_REG_CALL_STACK, STACK_ADDEND, 1);
43
tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
44
CPU_TEMP_BUF_NLONGS * sizeof(long));
45
46
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
47
*/
48
s->code_gen_epilogue = s->code_ptr;
49
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 0);
50
-
51
- /* TB epilogue */
52
tb_ret_addr = s->code_ptr;
53
+ tcg_out_epilogue(s);
54
+}
41
+}
55
+
42
+
56
+static void tcg_out_epilogue(TCGContext *s)
43
void tcg_gen_gvec_not(unsigned vece, uint32_t dofs, uint32_t aofs,
57
+{
44
uint32_t oprsz, uint32_t maxsz)
58
+ /* Release local stack frame. */
45
{
59
tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK,
60
- TCG_REG_CALL_STACK, stack_addend, 1);
61
+ TCG_REG_CALL_STACK, STACK_ADDEND, 1);
62
63
/* ldmia sp!, { r4 - r11, pc } */
64
tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0);
65
--
46
--
66
2.20.1
47
2.20.1
67
48
68
49
diff view generated by jsdifflib
1
From: Alex Bennée <alex.bennee@linaro.org>
1
The gen_gvec_dupi switch is unnecessary with the new function.
2
Replace it with a local gen_gvec_dup_imm that takes care of the
3
register to offset conversion and length arguments.
2
4
3
The bug describes a race whereby cpu_exec_step_atomic can acquire a TB
5
Drop zero_vec and use use gen_gvec_dup_imm with 0.
4
which is invalidated by a tb_flush before we execute it. This doesn't
5
affect the other cpu_exec modes as a tb_flush by it's nature can only
6
occur on a quiescent system. The race was described as:
7
6
8
B2. tcg_cpu_exec => cpu_exec => tb_find => tb_gen_code
7
Reviewed-by: David Hildenbrand <david@redhat.com>
9
B3. tcg_tb_alloc obtains a new TB
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
10
11
C3. TB obtained with tb_lookup__cpu_state or tb_gen_code
12
(same TB as B2)
13
14
A3. start_exclusive critical section entered
15
A4. do_tb_flush is called, TB memory freed/re-allocated
16
A5. end_exclusive exits critical section
17
18
B2. tcg_cpu_exec => cpu_exec => tb_find => tb_gen_code
19
B3. tcg_tb_alloc reallocates TB from B2
20
21
C4. start_exclusive critical section entered
22
C5. cpu_tb_exec executes the TB code that was free in A4
23
24
The simplest fix is to widen the exclusive period to include the TB
25
lookup. As a result we can drop the complication of checking we are in
26
the exclusive region before we end it.
27
28
Cc: Yifan <me@yifanlu.com>
29
Buglink: https://bugs.launchpad.net/qemu/+bug/1863025
30
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
31
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
32
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
33
Message-Id: <20200214144952.15502-1-alex.bennee@linaro.org>
34
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
35
---
10
---
36
accel/tcg/cpu-exec.c | 21 +++++++++++----------
11
target/s390x/translate_vx.inc.c | 41 +++++++--------------------------
37
1 file changed, 11 insertions(+), 10 deletions(-)
12
1 file changed, 8 insertions(+), 33 deletions(-)
38
13
39
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
14
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
40
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
41
--- a/accel/tcg/cpu-exec.c
16
--- a/target/s390x/translate_vx.inc.c
42
+++ b/accel/tcg/cpu-exec.c
17
+++ b/target/s390x/translate_vx.inc.c
43
@@ -XXX,XX +XXX,XX @@ void cpu_exec_step_atomic(CPUState *cpu)
18
@@ -XXX,XX +XXX,XX @@ static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
44
uint32_t cf_mask = cflags & CF_HASH_MASK;
19
#define gen_gvec_mov(v1, v2) \
45
20
tcg_gen_gvec_mov(0, vec_full_reg_offset(v1), vec_full_reg_offset(v2), 16, \
46
if (sigsetjmp(cpu->jmp_env, 0) == 0) {
21
16)
47
+ start_exclusive();
22
-#define gen_gvec_dup64i(v1, c) \
48
+
23
- tcg_gen_gvec_dup64i(vec_full_reg_offset(v1), 16, 16, c)
49
tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask);
24
+#define gen_gvec_dup_imm(es, v1, c) \
50
if (tb == NULL) {
25
+ tcg_gen_gvec_dup_imm(es, vec_full_reg_offset(v1), 16, 16, c);
51
mmap_lock();
26
#define gen_gvec_fn_2(fn, es, v1, v2) \
52
@@ -XXX,XX +XXX,XX @@ void cpu_exec_step_atomic(CPUState *cpu)
27
tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
53
mmap_unlock();
28
16, 16)
29
@@ -XXX,XX +XXX,XX @@ static void gen_gvec128_4_i64(gen_gvec128_4_i64_fn fn, uint8_t d, uint8_t a,
30
tcg_temp_free_i64(cl);
31
}
32
33
-static void gen_gvec_dupi(uint8_t es, uint8_t reg, uint64_t c)
34
-{
35
- switch (es) {
36
- case ES_8:
37
- tcg_gen_gvec_dup8i(vec_full_reg_offset(reg), 16, 16, c);
38
- break;
39
- case ES_16:
40
- tcg_gen_gvec_dup16i(vec_full_reg_offset(reg), 16, 16, c);
41
- break;
42
- case ES_32:
43
- tcg_gen_gvec_dup32i(vec_full_reg_offset(reg), 16, 16, c);
44
- break;
45
- case ES_64:
46
- gen_gvec_dup64i(reg, c);
47
- break;
48
- default:
49
- g_assert_not_reached();
50
- }
51
-}
52
-
53
-static void zero_vec(uint8_t reg)
54
-{
55
- tcg_gen_gvec_dup8i(vec_full_reg_offset(reg), 16, 16, 0);
56
-}
57
-
58
static void gen_addi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
59
uint64_t b)
60
{
61
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_vgbm(DisasContext *s, DisasOps *o)
62
* Masks for both 64 bit elements of the vector are the same.
63
* Trust tcg to produce a good constant loading.
64
*/
65
- gen_gvec_dup64i(get_field(s, v1),
66
- generate_byte_mask(i2 & 0xff));
67
+ gen_gvec_dup_imm(ES_64, get_field(s, v1),
68
+ generate_byte_mask(i2 & 0xff));
69
} else {
70
TCGv_i64 t = tcg_temp_new_i64();
71
72
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_vgm(DisasContext *s, DisasOps *o)
54
}
73
}
55
56
- start_exclusive();
57
-
58
/* Since we got here, we know that parallel_cpus must be true. */
59
parallel_cpus = false;
60
cc->cpu_exec_enter(cpu);
61
@@ -XXX,XX +XXX,XX @@ void cpu_exec_step_atomic(CPUState *cpu)
62
qemu_plugin_disable_mem_helpers(cpu);
63
}
74
}
64
75
65
- if (cpu_in_exclusive_context(cpu)) {
76
- gen_gvec_dupi(es, get_field(s, v1), mask);
66
- /* We might longjump out of either the codegen or the
77
+ gen_gvec_dup_imm(es, get_field(s, v1), mask);
67
- * execution, so must make sure we only end the exclusive
78
return DISAS_NEXT;
68
- * region if we started it.
69
- */
70
- parallel_cpus = true;
71
- end_exclusive();
72
- }
73
+
74
+ /*
75
+ * As we start the exclusive region before codegen we must still
76
+ * be in the region if we longjump out of either the codegen or
77
+ * the execution.
78
+ */
79
+ g_assert(cpu_in_exclusive_context(cpu));
80
+ parallel_cpus = true;
81
+ end_exclusive();
82
}
79
}
83
80
84
struct tb_desc {
81
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_vllez(DisasContext *s, DisasOps *o)
82
83
t = tcg_temp_new_i64();
84
tcg_gen_qemu_ld_i64(t, o->addr1, get_mem_index(s), MO_TE | es);
85
- zero_vec(get_field(s, v1));
86
+ gen_gvec_dup_imm(es, get_field(s, v1), 0);
87
write_vec_element_i64(t, get_field(s, v1), enr, es);
88
tcg_temp_free_i64(t);
89
return DISAS_NEXT;
90
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_vrepi(DisasContext *s, DisasOps *o)
91
return DISAS_NORETURN;
92
}
93
94
- gen_gvec_dupi(es, get_field(s, v1), data);
95
+ gen_gvec_dup_imm(es, get_field(s, v1), data);
96
return DISAS_NEXT;
97
}
98
99
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_vcksm(DisasContext *s, DisasOps *o)
100
read_vec_element_i32(tmp, get_field(s, v2), i, ES_32);
101
tcg_gen_add2_i32(tmp, sum, sum, sum, tmp, tmp);
102
}
103
- zero_vec(get_field(s, v1));
104
+ gen_gvec_dup_imm(ES_32, get_field(s, v1), 0);
105
write_vec_element_i32(sum, get_field(s, v1), 1, ES_32);
106
107
tcg_temp_free_i32(tmp);
85
--
108
--
86
2.20.1
109
2.20.1
87
110
88
111
diff view generated by jsdifflib
New patch
1
We can now unify the implementation of the 3 VSPLTI instructions.
1
2
3
Acked-by: David Gibson <david@gibson.dropbear.id.au>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
target/ppc/translate/vmx-impl.inc.c | 32 ++++++++++++++++-------------
7
target/ppc/translate/vsx-impl.inc.c | 2 +-
8
2 files changed, 19 insertions(+), 15 deletions(-)
9
10
diff --git a/target/ppc/translate/vmx-impl.inc.c b/target/ppc/translate/vmx-impl.inc.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/ppc/translate/vmx-impl.inc.c
13
+++ b/target/ppc/translate/vmx-impl.inc.c
14
@@ -XXX,XX +XXX,XX @@ GEN_VXRFORM_DUAL(vcmpbfp, PPC_ALTIVEC, PPC_NONE, \
15
GEN_VXRFORM_DUAL(vcmpgtfp, PPC_ALTIVEC, PPC_NONE, \
16
vcmpgtud, PPC_NONE, PPC2_ALTIVEC_207)
17
18
-#define GEN_VXFORM_DUPI(name, tcg_op, opc2, opc3) \
19
-static void glue(gen_, name)(DisasContext *ctx) \
20
- { \
21
- int simm; \
22
- if (unlikely(!ctx->altivec_enabled)) { \
23
- gen_exception(ctx, POWERPC_EXCP_VPU); \
24
- return; \
25
- } \
26
- simm = SIMM5(ctx->opcode); \
27
- tcg_op(avr_full_offset(rD(ctx->opcode)), 16, 16, simm); \
28
+static void gen_vsplti(DisasContext *ctx, int vece)
29
+{
30
+ int simm;
31
+
32
+ if (unlikely(!ctx->altivec_enabled)) {
33
+ gen_exception(ctx, POWERPC_EXCP_VPU);
34
+ return;
35
}
36
37
-GEN_VXFORM_DUPI(vspltisb, tcg_gen_gvec_dup8i, 6, 12);
38
-GEN_VXFORM_DUPI(vspltish, tcg_gen_gvec_dup16i, 6, 13);
39
-GEN_VXFORM_DUPI(vspltisw, tcg_gen_gvec_dup32i, 6, 14);
40
+ simm = SIMM5(ctx->opcode);
41
+ tcg_gen_gvec_dup_imm(vece, avr_full_offset(rD(ctx->opcode)), 16, 16, simm);
42
+}
43
+
44
+#define GEN_VXFORM_VSPLTI(name, vece, opc2, opc3) \
45
+static void glue(gen_, name)(DisasContext *ctx) { gen_vsplti(ctx, vece); }
46
+
47
+GEN_VXFORM_VSPLTI(vspltisb, MO_8, 6, 12);
48
+GEN_VXFORM_VSPLTI(vspltish, MO_16, 6, 13);
49
+GEN_VXFORM_VSPLTI(vspltisw, MO_32, 6, 14);
50
51
#define GEN_VXFORM_NOA(name, opc2, opc3) \
52
static void glue(gen_, name)(DisasContext *ctx) \
53
@@ -XXX,XX +XXX,XX @@ GEN_VXFORM_DUAL(vsldoi, PPC_ALTIVEC, PPC_NONE,
54
#undef GEN_VXRFORM_DUAL
55
#undef GEN_VXRFORM1
56
#undef GEN_VXRFORM
57
-#undef GEN_VXFORM_DUPI
58
+#undef GEN_VXFORM_VSPLTI
59
#undef GEN_VXFORM_NOA
60
#undef GEN_VXFORM_UIMM
61
#undef GEN_VAFORM_PAIRED
62
diff --git a/target/ppc/translate/vsx-impl.inc.c b/target/ppc/translate/vsx-impl.inc.c
63
index XXXXXXX..XXXXXXX 100644
64
--- a/target/ppc/translate/vsx-impl.inc.c
65
+++ b/target/ppc/translate/vsx-impl.inc.c
66
@@ -XXX,XX +XXX,XX @@ static void gen_xxspltib(DisasContext *ctx)
67
return;
68
}
69
}
70
- tcg_gen_gvec_dup8i(vsr_full_offset(rt), 16, 16, uim8);
71
+ tcg_gen_gvec_dup_imm(MO_8, vsr_full_offset(rt), 16, 16, uim8);
72
}
73
74
static void gen_xxsldwi(DisasContext *ctx)
75
--
76
2.20.1
77
78
diff view generated by jsdifflib
1
From: Alex Bennée <alex.bennee@linaro.org>
1
In a few cases, we're able to remove some manual replication.
2
2
3
There is no particular reason to use a static codegen buffer on 64 bit
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
hosts as we have address space to burn. Allow the common CONFIG_USER
5
case to use the mmap'ed buffers like SoftMMU.
6
7
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
10
Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com>
11
Reviewed-by: Niek Linnenbank <nieklinnenbank@gmail.com>
12
Message-Id: <20200228192415.19867-4-alex.bennee@linaro.org>
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
---
5
---
15
accel/tcg/translate-all.c | 11 ++++++-----
6
target/arm/translate-a64.c | 10 +++++-----
16
1 file changed, 6 insertions(+), 5 deletions(-)
7
target/arm/translate-sve.c | 12 +++++-------
8
target/arm/translate.c | 9 ++++++---
9
3 files changed, 16 insertions(+), 15 deletions(-)
17
10
18
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
11
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
19
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
20
--- a/accel/tcg/translate-all.c
13
--- a/target/arm/translate-a64.c
21
+++ b/accel/tcg/translate-all.c
14
+++ b/target/arm/translate-a64.c
22
@@ -XXX,XX +XXX,XX @@ static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
15
@@ -XXX,XX +XXX,XX @@ static void clear_vec_high(DisasContext *s, bool is_q, int rd)
16
tcg_temp_free_i64(tcg_zero);
17
}
18
if (vsz > 16) {
19
- tcg_gen_gvec_dup8i(ofs + 16, vsz - 16, vsz - 16, 0);
20
+ tcg_gen_gvec_dup_imm(MO_64, ofs + 16, vsz - 16, vsz - 16, 0);
23
}
21
}
24
}
22
}
25
23
26
-#if defined(CONFIG_USER_ONLY)
24
@@ -XXX,XX +XXX,XX @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
27
-/* Currently it is not recommended to allocate big chunks of data in
25
28
- user mode. It will change when a dedicated libc will be used. */
26
if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
29
-/* ??? 64-bit hosts ought to have no problem mmaping data outside the
27
/* MOVI or MVNI, with MVNI negation handled above. */
30
- region in which the guest needs to run. Revisit this. */
28
- tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), is_q ? 16 : 8,
31
+#if defined(CONFIG_USER_ONLY) && TCG_TARGET_REG_BITS == 32
29
- vec_full_reg_size(s), imm);
32
+/*
30
+ tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8,
33
+ * For user mode on smaller 32 bit systems we may run into trouble
31
+ vec_full_reg_size(s), imm);
34
+ * allocating big chunks of data in the right place. On these systems
32
} else {
35
+ * we utilise a static code generation buffer directly in the binary.
33
/* ORR or BIC, with BIC negation to AND handled above. */
36
+ */
34
if (is_neg) {
37
#define USE_STATIC_CODE_GEN_BUFFER
35
@@ -XXX,XX +XXX,XX @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
38
#endif
36
if (is_u) {
39
37
if (shift == 8 << size) {
38
/* Shift count the same size as element size produces zero. */
39
- tcg_gen_gvec_dup8i(vec_full_reg_offset(s, rd),
40
- is_q ? 16 : 8, vec_full_reg_size(s), 0);
41
+ tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd),
42
+ is_q ? 16 : 8, vec_full_reg_size(s), 0);
43
} else {
44
gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shri, size);
45
}
46
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/translate-sve.c
49
+++ b/target/arm/translate-sve.c
50
@@ -XXX,XX +XXX,XX @@ static bool do_mov_z(DisasContext *s, int rd, int rn)
51
static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
52
{
53
unsigned vsz = vec_full_reg_size(s);
54
- tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
55
+ tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
56
}
57
58
/* Invoke a vector expander on two Pregs. */
59
@@ -XXX,XX +XXX,XX @@ static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
60
unsigned oprsz = size_for_gvec(setsz / 8);
61
62
if (oprsz * 8 == setsz) {
63
- tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
64
+ tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
65
goto done;
66
}
67
}
68
@@ -XXX,XX +XXX,XX @@ static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
69
unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
70
tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
71
} else {
72
- tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
73
+ tcg_gen_gvec_dup_imm(esz, dofs, vsz, vsz, 0);
74
}
75
}
76
return true;
77
@@ -XXX,XX +XXX,XX @@ static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
78
79
/* Decode the VFP immediate. */
80
imm = vfp_expand_imm(a->esz, a->imm);
81
- imm = dup_const(a->esz, imm);
82
-
83
- tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
84
+ tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
85
}
86
return true;
87
}
88
@@ -XXX,XX +XXX,XX @@ static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
89
unsigned vsz = vec_full_reg_size(s);
90
int dofs = vec_full_reg_offset(s, a->rd);
91
92
- tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
93
+ tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
94
}
95
return true;
96
}
97
diff --git a/target/arm/translate.c b/target/arm/translate.c
98
index XXXXXXX..XXXXXXX 100644
99
--- a/target/arm/translate.c
100
+++ b/target/arm/translate.c
101
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
102
MIN(shift, (8 << size) - 1),
103
vec_size, vec_size);
104
} else if (shift >= 8 << size) {
105
- tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0);
106
+ tcg_gen_gvec_dup_imm(MO_8, rd_ofs, vec_size,
107
+ vec_size, 0);
108
} else {
109
tcg_gen_gvec_shri(size, rd_ofs, rm_ofs, shift,
110
vec_size, vec_size);
111
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
112
* architecturally valid and results in zero.
113
*/
114
if (shift >= 8 << size) {
115
- tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0);
116
+ tcg_gen_gvec_dup_imm(size, rd_ofs,
117
+ vec_size, vec_size, 0);
118
} else {
119
tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift,
120
vec_size, vec_size);
121
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
122
}
123
tcg_temp_free_i64(t64);
124
} else {
125
- tcg_gen_gvec_dup32i(reg_ofs, vec_size, vec_size, imm);
126
+ tcg_gen_gvec_dup_imm(MO_32, reg_ofs, vec_size,
127
+ vec_size, imm);
128
}
129
}
130
}
40
--
131
--
41
2.20.1
132
2.20.1
42
133
43
134
diff view generated by jsdifflib
New patch
1
Replace the outgoing interface.
1
2
3
Reviewed-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/tcg-op-gvec.c | 8 ++++----
8
1 file changed, 4 insertions(+), 4 deletions(-)
9
10
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/tcg-op-gvec.c
13
+++ b/tcg/tcg-op-gvec.c
14
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_xor(unsigned vece, uint32_t dofs, uint32_t aofs,
15
};
16
17
if (aofs == bofs) {
18
- tcg_gen_gvec_dup8i(dofs, oprsz, maxsz, 0);
19
+ tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, 0);
20
} else {
21
tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g);
22
}
23
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_andc(unsigned vece, uint32_t dofs, uint32_t aofs,
24
};
25
26
if (aofs == bofs) {
27
- tcg_gen_gvec_dup8i(dofs, oprsz, maxsz, 0);
28
+ tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, 0);
29
} else {
30
tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g);
31
}
32
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_orc(unsigned vece, uint32_t dofs, uint32_t aofs,
33
};
34
35
if (aofs == bofs) {
36
- tcg_gen_gvec_dup8i(dofs, oprsz, maxsz, -1);
37
+ tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, -1);
38
} else {
39
tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g);
40
}
41
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_eqv(unsigned vece, uint32_t dofs, uint32_t aofs,
42
};
43
44
if (aofs == bofs) {
45
- tcg_gen_gvec_dup8i(dofs, oprsz, maxsz, -1);
46
+ tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, -1);
47
} else {
48
tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g);
49
}
50
--
51
2.20.1
52
53
diff view generated by jsdifflib
1
From: Alex Bennée <alex.bennee@linaro.org>
1
These interfaces are now unused.
2
2
3
While 32mb is certainly usable a full system boot ends up flushing the
3
Reviewed-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
4
codegen buffer nearly 100 times. Increase the default on 64 bit hosts
4
Reviewed-by: David Hildenbrand <david@redhat.com>
5
to take advantage of all that spare memory. After this change I can
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
boot my tests system without any TB flushes.
7
8
As we usually run more CONFIG_USER binaries at a time in typical usage
9
we aren't quite as profligate for user-mode code generation usage. We
10
also bring the static code gen defies to the same place to keep all
11
the reasoning in the comments together.
12
13
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
14
Tested-by: Niek Linnenbank <nieklinnenbank@gmail.com>
15
Reviewed-by: Niek Linnenbank <nieklinnenbank@gmail.com>
16
Message-Id: <20200228192415.19867-5-alex.bennee@linaro.org>
17
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
18
---
7
---
19
accel/tcg/translate-all.c | 35 ++++++++++++++++++++++++++---------
8
include/tcg/tcg-op-gvec.h | 5 -----
20
1 file changed, 26 insertions(+), 9 deletions(-)
9
tcg/tcg-op-gvec.c | 28 ----------------------------
10
2 files changed, 33 deletions(-)
21
11
22
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
12
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
23
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
24
--- a/accel/tcg/translate-all.c
14
--- a/include/tcg/tcg-op-gvec.h
25
+++ b/accel/tcg/translate-all.c
15
+++ b/include/tcg/tcg-op-gvec.h
26
@@ -XXX,XX +XXX,XX @@ static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
16
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_dup_i32(unsigned vece, uint32_t dofs, uint32_t s,
17
void tcg_gen_gvec_dup_i64(unsigned vece, uint32_t dofs, uint32_t s,
18
uint32_t m, TCGv_i64);
19
20
-void tcg_gen_gvec_dup8i(uint32_t dofs, uint32_t s, uint32_t m, uint8_t x);
21
-void tcg_gen_gvec_dup16i(uint32_t dofs, uint32_t s, uint32_t m, uint16_t x);
22
-void tcg_gen_gvec_dup32i(uint32_t dofs, uint32_t s, uint32_t m, uint32_t x);
23
-void tcg_gen_gvec_dup64i(uint32_t dofs, uint32_t s, uint32_t m, uint64_t x);
24
-
25
void tcg_gen_gvec_shli(unsigned vece, uint32_t dofs, uint32_t aofs,
26
int64_t shift, uint32_t oprsz, uint32_t maxsz);
27
void tcg_gen_gvec_shri(unsigned vece, uint32_t dofs, uint32_t aofs,
28
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/tcg/tcg-op-gvec.c
31
+++ b/tcg/tcg-op-gvec.c
32
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs,
27
}
33
}
28
}
34
}
29
35
30
-#if defined(CONFIG_USER_ONLY) && TCG_TARGET_REG_BITS == 32
36
-void tcg_gen_gvec_dup64i(uint32_t dofs, uint32_t oprsz,
31
-/*
37
- uint32_t maxsz, uint64_t x)
32
- * For user mode on smaller 32 bit systems we may run into trouble
38
-{
33
- * allocating big chunks of data in the right place. On these systems
39
- check_size_align(oprsz, maxsz, dofs);
34
- * we utilise a static code generation buffer directly in the binary.
40
- do_dup(MO_64, dofs, oprsz, maxsz, NULL, NULL, x);
35
- */
41
-}
36
-#define USE_STATIC_CODE_GEN_BUFFER
37
-#endif
38
-
42
-
39
/* Minimum size of the code gen buffer. This number is randomly chosen,
43
-void tcg_gen_gvec_dup32i(uint32_t dofs, uint32_t oprsz,
40
but not so small that we can't have a fair number of TB's live. */
44
- uint32_t maxsz, uint32_t x)
41
#define MIN_CODE_GEN_BUFFER_SIZE (1 * MiB)
45
-{
42
@@ -XXX,XX +XXX,XX @@ static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
46
- check_size_align(oprsz, maxsz, dofs);
43
# define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
47
- do_dup(MO_32, dofs, oprsz, maxsz, NULL, NULL, x);
44
#endif
48
-}
45
49
-
46
+#if TCG_TARGET_REG_BITS == 32
50
-void tcg_gen_gvec_dup16i(uint32_t dofs, uint32_t oprsz,
47
#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32 * MiB)
51
- uint32_t maxsz, uint16_t x)
48
+#ifdef CONFIG_USER_ONLY
52
-{
49
+/*
53
- check_size_align(oprsz, maxsz, dofs);
50
+ * For user mode on smaller 32 bit systems we may run into trouble
54
- do_dup(MO_16, dofs, oprsz, maxsz, NULL, NULL, x);
51
+ * allocating big chunks of data in the right place. On these systems
55
-}
52
+ * we utilise a static code generation buffer directly in the binary.
56
-
53
+ */
57
-void tcg_gen_gvec_dup8i(uint32_t dofs, uint32_t oprsz,
54
+#define USE_STATIC_CODE_GEN_BUFFER
58
- uint32_t maxsz, uint8_t x)
55
+#endif
59
-{
56
+#else /* TCG_TARGET_REG_BITS == 64 */
60
- check_size_align(oprsz, maxsz, dofs);
57
+#ifdef CONFIG_USER_ONLY
61
- do_dup(MO_8, dofs, oprsz, maxsz, NULL, NULL, x);
58
+/*
62
-}
59
+ * As user-mode emulation typically means running multiple instances
63
-
60
+ * of the translator don't go too nuts with our default code gen
64
void tcg_gen_gvec_dup_imm(unsigned vece, uint32_t dofs, uint32_t oprsz,
61
+ * buffer lest we make things too hard for the OS.
65
uint32_t maxsz, uint64_t x)
62
+ */
66
{
63
+#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (128 * MiB)
64
+#else
65
+/*
66
+ * We expect most system emulation to run one or two guests per host.
67
+ * Users running large scale system emulation may want to tweak their
68
+ * runtime setup via the tb-size control on the command line.
69
+ */
70
+#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (1 * GiB)
71
+#endif
72
+#endif
73
74
#define DEFAULT_CODE_GEN_BUFFER_SIZE \
75
(DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
76
--
67
--
77
2.20.1
68
2.20.1
78
69
79
70
diff view generated by jsdifflib
1
From: Alex Bennée <alex.bennee@linaro.org>
1
For use when a target needs to pass a configure-specific
2
target_ulong value to duplicate.
2
3
3
Basing the TB cache size on the ram_size was always a little heuristic
4
Reviewed-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
4
and was broken by a1b18df9a4 which caused ram_size not to be fully
5
Reviewed-by: David Hildenbrand <david@redhat.com>
5
realised at the time we initialise the TCG translation cache.
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
7
The current DEFAULT_CODE_GEN_BUFFER_SIZE may still be a little small
8
but follow-up patches will address that.
9
10
Fixes: a1b18df9a4
11
Cc: Igor Mammedov <imammedo@redhat.com>
12
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
13
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
14
Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com>
15
Reviewed-by: Niek Linnenbank <nieklinnenbank@gmail.com>
16
Message-Id: <20200228192415.19867-3-alex.bennee@linaro.org>
17
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
18
---
8
---
19
accel/tcg/translate-all.c | 8 --------
9
include/tcg/tcg-op-gvec.h | 6 ++++++
20
1 file changed, 8 deletions(-)
10
1 file changed, 6 insertions(+)
21
11
22
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
12
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
23
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
24
--- a/accel/tcg/translate-all.c
14
--- a/include/tcg/tcg-op-gvec.h
25
+++ b/accel/tcg/translate-all.c
15
+++ b/include/tcg/tcg-op-gvec.h
26
@@ -XXX,XX +XXX,XX @@ static inline size_t size_code_gen_buffer(size_t tb_size)
16
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_dup_i32(unsigned vece, uint32_t dofs, uint32_t s,
27
{
17
void tcg_gen_gvec_dup_i64(unsigned vece, uint32_t dofs, uint32_t s,
28
/* Size the buffer. */
18
uint32_t m, TCGv_i64);
29
if (tb_size == 0) {
19
30
-#ifdef USE_STATIC_CODE_GEN_BUFFER
20
+#if TARGET_LONG_BITS == 64
31
tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
21
+# define tcg_gen_gvec_dup_tl tcg_gen_gvec_dup_i64
32
-#else
22
+#else
33
- /* ??? Needs adjustments. */
23
+# define tcg_gen_gvec_dup_tl tcg_gen_gvec_dup_i32
34
- /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
24
+#endif
35
- static buffer, we could size this on RESERVED_VA, on the text
25
+
36
- segment size of the executable, or continue to use the default. */
26
void tcg_gen_gvec_shli(unsigned vece, uint32_t dofs, uint32_t aofs,
37
- tb_size = (unsigned long)(ram_size / 4);
27
int64_t shift, uint32_t oprsz, uint32_t maxsz);
38
-#endif
28
void tcg_gen_gvec_shri(unsigned vece, uint32_t dofs, uint32_t aofs,
39
}
40
if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
41
tb_size = MIN_CODE_GEN_BUFFER_SIZE;
42
--
29
--
43
2.20.1
30
2.20.1
44
31
45
32
diff view generated by jsdifflib
1
From: Alex Bennée <alex.bennee@linaro.org>
1
Better handling of non-power-of-2 tails as seen with Arm 8-byte
2
vector operations.
2
3
3
It's easier to read.
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
5
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Niek Linnenbank <nieklinnenbank@gmail.com>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
9
Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com>
10
Message-Id: <20200228192415.19867-2-alex.bennee@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
6
---
13
accel/tcg/translate-all.c | 19 ++++++++++---------
7
tcg/tcg-op-gvec.c | 82 ++++++++++++++++++++++++++++++++++++-----------
14
1 file changed, 10 insertions(+), 9 deletions(-)
8
1 file changed, 63 insertions(+), 19 deletions(-)
15
9
16
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
10
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
17
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
18
--- a/accel/tcg/translate-all.c
12
--- a/tcg/tcg-op-gvec.c
19
+++ b/accel/tcg/translate-all.c
13
+++ b/tcg/tcg-op-gvec.c
20
@@ -XXX,XX +XXX,XX @@
14
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_5_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
21
*/
15
in units of LNSZ. This limits the expansion of inline code. */
22
16
static inline bool check_size_impl(uint32_t oprsz, uint32_t lnsz)
23
#include "qemu/osdep.h"
17
{
24
+#include "qemu/units.h"
18
- if (oprsz % lnsz == 0) {
25
#include "qemu-common.h"
19
- uint32_t lnct = oprsz / lnsz;
26
20
- return lnct >= 1 && lnct <= MAX_UNROLL;
27
#define NO_CPU_IO_DEFS
21
+ uint32_t q, r;
28
@@ -XXX,XX +XXX,XX @@ static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
22
+
29
23
+ if (oprsz < lnsz) {
30
/* Minimum size of the code gen buffer. This number is randomly chosen,
24
+ return false;
31
but not so small that we can't have a fair number of TB's live. */
25
}
32
-#define MIN_CODE_GEN_BUFFER_SIZE (1024u * 1024)
26
- return false;
33
+#define MIN_CODE_GEN_BUFFER_SIZE (1 * MiB)
27
+
34
28
+ q = oprsz / lnsz;
35
/* Maximum size of the code gen buffer we'd like to use. Unless otherwise
29
+ r = oprsz % lnsz;
36
indicated, this is constrained by the range of direct branches on the
30
+ tcg_debug_assert((r & 7) == 0);
37
host cpu, as used by the TCG implementation of goto_tb. */
31
+
38
#if defined(__x86_64__)
32
+ if (lnsz < 16) {
39
-# define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
33
+ /* For sizes below 16, accept no remainder. */
40
+# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
34
+ if (r != 0) {
41
#elif defined(__sparc__)
35
+ return false;
42
-# define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
36
+ }
43
+# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
37
+ } else {
44
#elif defined(__powerpc64__)
38
+ /*
45
-# define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
39
+ * Recall that ARM SVE allows vector sizes that are not a
46
+# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
40
+ * power of 2, but always a multiple of 16. The intent is
47
#elif defined(__powerpc__)
41
+ * that e.g. size == 80 would be expanded with 2x32 + 1x16.
48
-# define MAX_CODE_GEN_BUFFER_SIZE (32u * 1024 * 1024)
42
+ * In addition, expand_clr needs to handle a multiple of 8.
49
+# define MAX_CODE_GEN_BUFFER_SIZE (32 * MiB)
43
+ * Thus we can handle the tail with one more operation per
50
#elif defined(__aarch64__)
44
+ * diminishing power of 2.
51
-# define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
45
+ */
52
+# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
46
+ q += ctpop32(r);
53
#elif defined(__s390x__)
47
+ }
54
/* We have a +- 4GB range on the branches; leave some slop. */
48
+
55
-# define MAX_CODE_GEN_BUFFER_SIZE (3ul * 1024 * 1024 * 1024)
49
+ return q <= MAX_UNROLL;
56
+# define MAX_CODE_GEN_BUFFER_SIZE (3 * GiB)
50
}
57
#elif defined(__mips__)
51
58
/* We have a 256MB branch region, but leave room to make sure the
52
static void expand_clr(uint32_t dofs, uint32_t maxsz);
59
main executable is also within that region. */
53
@@ -XXX,XX +XXX,XX @@ static void gen_dup_i64(unsigned vece, TCGv_i64 out, TCGv_i64 in)
60
-# define MAX_CODE_GEN_BUFFER_SIZE (128ul * 1024 * 1024)
54
static TCGType choose_vector_type(const TCGOpcode *list, unsigned vece,
61
+# define MAX_CODE_GEN_BUFFER_SIZE (128 * MiB)
55
uint32_t size, bool prefer_i64)
62
#else
56
{
63
# define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
57
- if (TCG_TARGET_HAS_v256 && check_size_impl(size, 32)) {
64
#endif
58
- /*
65
59
- * Recall that ARM SVE allows vector sizes that are not a
66
-#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)
60
- * power of 2, but always a multiple of 16. The intent is
67
+#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32 * MiB)
61
- * that e.g. size == 80 would be expanded with 2x32 + 1x16.
68
62
- * It is hard to imagine a case in which v256 is supported
69
#define DEFAULT_CODE_GEN_BUFFER_SIZE \
63
- * but v128 is not, but check anyway.
70
(DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
64
- */
65
- if (tcg_can_emit_vecop_list(list, TCG_TYPE_V256, vece)
66
- && (size % 32 == 0
67
- || tcg_can_emit_vecop_list(list, TCG_TYPE_V128, vece))) {
68
- return TCG_TYPE_V256;
69
- }
70
+ /*
71
+ * Recall that ARM SVE allows vector sizes that are not a
72
+ * power of 2, but always a multiple of 16. The intent is
73
+ * that e.g. size == 80 would be expanded with 2x32 + 1x16.
74
+ * It is hard to imagine a case in which v256 is supported
75
+ * but v128 is not, but check anyway.
76
+ * In addition, expand_clr needs to handle a multiple of 8.
77
+ */
78
+ if (TCG_TARGET_HAS_v256 &&
79
+ check_size_impl(size, 32) &&
80
+ tcg_can_emit_vecop_list(list, TCG_TYPE_V256, vece) &&
81
+ (!(size & 16) ||
82
+ (TCG_TARGET_HAS_v128 &&
83
+ tcg_can_emit_vecop_list(list, TCG_TYPE_V128, vece))) &&
84
+ (!(size & 8) ||
85
+ (TCG_TARGET_HAS_v64 &&
86
+ tcg_can_emit_vecop_list(list, TCG_TYPE_V64, vece)))) {
87
+ return TCG_TYPE_V256;
88
}
89
- if (TCG_TARGET_HAS_v128 && check_size_impl(size, 16)
90
- && tcg_can_emit_vecop_list(list, TCG_TYPE_V128, vece)) {
91
+ if (TCG_TARGET_HAS_v128 &&
92
+ check_size_impl(size, 16) &&
93
+ tcg_can_emit_vecop_list(list, TCG_TYPE_V128, vece) &&
94
+ (!(size & 8) ||
95
+ (TCG_TARGET_HAS_v64 &&
96
+ tcg_can_emit_vecop_list(list, TCG_TYPE_V64, vece)))) {
97
return TCG_TYPE_V128;
98
}
99
if (TCG_TARGET_HAS_v64 && !prefer_i64 && check_size_impl(size, 8)
100
@@ -XXX,XX +XXX,XX @@ static void do_dup_store(TCGType type, uint32_t dofs, uint32_t oprsz,
101
{
102
uint32_t i = 0;
103
104
+ tcg_debug_assert(oprsz >= 8);
105
+
106
+ /*
107
+ * This may be expand_clr for the tail of an operation, e.g.
108
+ * oprsz == 8 && maxsz == 64. The first 8 bytes of this store
109
+ * are misaligned wrt the maximum vector size, so do that first.
110
+ */
111
+ if (dofs & 8) {
112
+ tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V64);
113
+ i += 8;
114
+ }
115
+
116
switch (type) {
117
case TCG_TYPE_V256:
118
/*
71
--
119
--
72
2.20.1
120
2.20.1
73
121
74
122
diff view generated by jsdifflib
1
From: Zenghui Yu <yuzenghui@huawei.com>
1
We have this same parameter for GVecGen2i, GVecGen3,
2
and GVecGen3i. This will make some SVE2 insns easier
3
to parameterize.
2
4
3
Our robot reported the following compile-time warning while compiling
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Qemu with -fno-inline cflags:
5
6
In function 'load_memop',
7
inlined from 'load_helper' at /qemu/accel/tcg/cputlb.c:1578:20,
8
inlined from 'full_ldub_mmu' at /qemu/accel/tcg/cputlb.c:1624:12:
9
/qemu/accel/tcg/cputlb.c:1502:9: error: call to 'qemu_build_not_reached' declared with attribute error: code path is reachable
10
qemu_build_not_reached();
11
^~~~~~~~~~~~~~~~~~~~~~~~
12
[...]
13
14
It looks like a false-positive because only (MO_UB ^ MO_BSWAP) will
15
hit the default case in load_memop() while need_swap (size > 1) has
16
already ensured that MO_UB is not involved.
17
18
So the thing is that compilers get confused by the -fno-inline and
19
just can't accurately evaluate memop_size(op) at compile time, and
20
then the qemu_build_not_reached() is wrongly triggered by (MO_UB ^
21
MO_BSWAP). Let's carefully don't use the compile-time assert when
22
no functions will be inlined into their callers.
23
24
Reported-by: Euler Robot <euler.robot@huawei.com>
25
Suggested-by: Richard Henderson <richard.henderson@linaro.org>
26
Signed-off-by: Zenghui Yu <yuzenghui@huawei.com>
27
Message-Id: <20200205141545.180-1-yuzenghui@huawei.com>
28
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
29
---
7
---
30
include/qemu/compiler.h | 2 +-
8
include/tcg/tcg-op-gvec.h | 2 ++
31
1 file changed, 1 insertion(+), 1 deletion(-)
9
tcg/tcg-op-gvec.c | 45 ++++++++++++++++++++++++++++-----------
10
2 files changed, 34 insertions(+), 13 deletions(-)
32
11
33
diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h
12
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
34
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
35
--- a/include/qemu/compiler.h
14
--- a/include/tcg/tcg-op-gvec.h
36
+++ b/include/qemu/compiler.h
15
+++ b/include/tcg/tcg-op-gvec.h
37
@@ -XXX,XX +XXX,XX @@
16
@@ -XXX,XX +XXX,XX @@ typedef struct {
38
* supports QEMU_ERROR, this will be reported at compile time; otherwise
17
uint8_t vece;
39
* this will be reported at link time due to the missing symbol.
18
/* Prefer i64 to v64. */
40
*/
19
bool prefer_i64;
41
-#ifdef __OPTIMIZE__
20
+ /* Load dest as a 2nd source operand. */
42
+#if defined(__OPTIMIZE__) && !defined(__NO_INLINE__)
21
+ bool load_dest;
43
extern void QEMU_NORETURN QEMU_ERROR("code path is reachable")
22
} GVecGen2;
44
qemu_build_not_reached(void);
23
45
#else
24
typedef struct {
25
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
26
index XXXXXXX..XXXXXXX 100644
27
--- a/tcg/tcg-op-gvec.c
28
+++ b/tcg/tcg-op-gvec.c
29
@@ -XXX,XX +XXX,XX @@ static void expand_clr(uint32_t dofs, uint32_t maxsz)
30
31
/* Expand OPSZ bytes worth of two-operand operations using i32 elements. */
32
static void expand_2_i32(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
33
- void (*fni)(TCGv_i32, TCGv_i32))
34
+ bool load_dest, void (*fni)(TCGv_i32, TCGv_i32))
35
{
36
TCGv_i32 t0 = tcg_temp_new_i32();
37
+ TCGv_i32 t1 = tcg_temp_new_i32();
38
uint32_t i;
39
40
for (i = 0; i < oprsz; i += 4) {
41
tcg_gen_ld_i32(t0, cpu_env, aofs + i);
42
- fni(t0, t0);
43
- tcg_gen_st_i32(t0, cpu_env, dofs + i);
44
+ if (load_dest) {
45
+ tcg_gen_ld_i32(t1, cpu_env, dofs + i);
46
+ }
47
+ fni(t1, t0);
48
+ tcg_gen_st_i32(t1, cpu_env, dofs + i);
49
}
50
tcg_temp_free_i32(t0);
51
+ tcg_temp_free_i32(t1);
52
}
53
54
static void expand_2i_i32(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
55
@@ -XXX,XX +XXX,XX @@ static void expand_4_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs,
56
57
/* Expand OPSZ bytes worth of two-operand operations using i64 elements. */
58
static void expand_2_i64(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
59
- void (*fni)(TCGv_i64, TCGv_i64))
60
+ bool load_dest, void (*fni)(TCGv_i64, TCGv_i64))
61
{
62
TCGv_i64 t0 = tcg_temp_new_i64();
63
+ TCGv_i64 t1 = tcg_temp_new_i64();
64
uint32_t i;
65
66
for (i = 0; i < oprsz; i += 8) {
67
tcg_gen_ld_i64(t0, cpu_env, aofs + i);
68
- fni(t0, t0);
69
- tcg_gen_st_i64(t0, cpu_env, dofs + i);
70
+ if (load_dest) {
71
+ tcg_gen_ld_i64(t1, cpu_env, dofs + i);
72
+ }
73
+ fni(t1, t0);
74
+ tcg_gen_st_i64(t1, cpu_env, dofs + i);
75
}
76
tcg_temp_free_i64(t0);
77
+ tcg_temp_free_i64(t1);
78
}
79
80
static void expand_2i_i64(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
81
@@ -XXX,XX +XXX,XX @@ static void expand_4_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs,
82
/* Expand OPSZ bytes worth of two-operand operations using host vectors. */
83
static void expand_2_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
84
uint32_t oprsz, uint32_t tysz, TCGType type,
85
+ bool load_dest,
86
void (*fni)(unsigned, TCGv_vec, TCGv_vec))
87
{
88
TCGv_vec t0 = tcg_temp_new_vec(type);
89
+ TCGv_vec t1 = tcg_temp_new_vec(type);
90
uint32_t i;
91
92
for (i = 0; i < oprsz; i += tysz) {
93
tcg_gen_ld_vec(t0, cpu_env, aofs + i);
94
- fni(vece, t0, t0);
95
- tcg_gen_st_vec(t0, cpu_env, dofs + i);
96
+ if (load_dest) {
97
+ tcg_gen_ld_vec(t1, cpu_env, dofs + i);
98
+ }
99
+ fni(vece, t1, t0);
100
+ tcg_gen_st_vec(t1, cpu_env, dofs + i);
101
}
102
tcg_temp_free_vec(t0);
103
+ tcg_temp_free_vec(t1);
104
}
105
106
/* Expand OPSZ bytes worth of two-vector operands and an immediate operand
107
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs,
108
* that e.g. size == 80 would be expanded with 2x32 + 1x16.
109
*/
110
some = QEMU_ALIGN_DOWN(oprsz, 32);
111
- expand_2_vec(g->vece, dofs, aofs, some, 32, TCG_TYPE_V256, g->fniv);
112
+ expand_2_vec(g->vece, dofs, aofs, some, 32, TCG_TYPE_V256,
113
+ g->load_dest, g->fniv);
114
if (some == oprsz) {
115
break;
116
}
117
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs,
118
maxsz -= some;
119
/* fallthru */
120
case TCG_TYPE_V128:
121
- expand_2_vec(g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128, g->fniv);
122
+ expand_2_vec(g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128,
123
+ g->load_dest, g->fniv);
124
break;
125
case TCG_TYPE_V64:
126
- expand_2_vec(g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64, g->fniv);
127
+ expand_2_vec(g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64,
128
+ g->load_dest, g->fniv);
129
break;
130
131
case 0:
132
if (g->fni8 && check_size_impl(oprsz, 8)) {
133
- expand_2_i64(dofs, aofs, oprsz, g->fni8);
134
+ expand_2_i64(dofs, aofs, oprsz, g->load_dest, g->fni8);
135
} else if (g->fni4 && check_size_impl(oprsz, 4)) {
136
- expand_2_i32(dofs, aofs, oprsz, g->fni4);
137
+ expand_2_i32(dofs, aofs, oprsz, g->load_dest, g->fni4);
138
} else {
139
assert(g->fno != NULL);
140
tcg_gen_gvec_2_ool(dofs, aofs, oprsz, maxsz, g->data, g->fno);
46
--
141
--
47
2.20.1
142
2.20.1
48
143
49
144
diff view generated by jsdifflib
1
From: Richard Henderson <rth@twiddle.net>
1
For the benefit of compatibility of function pointer types,
2
we have standardized on int32_t and int64_t as the integral
3
argument to tcg expanders.
2
4
3
It is, after all, just two instructions.
5
We converted most of them in 474b2e8f0f7, but missed the rotates.
4
6
5
Profiling on a cortex-a15, using -d nochain to increase the number
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
of exit_tb that are executed, shows a minor improvement of 0.5%.
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
include/tcg/tcg-op.h | 8 ++++----
12
tcg/tcg-op.c | 16 ++++++++--------
13
2 files changed, 12 insertions(+), 12 deletions(-)
7
14
8
Signed-off-by: Richard Henderson <rth@twiddle.net>
15
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
9
---
10
tcg/arm/tcg-target.inc.c | 12 ++----------
11
1 file changed, 2 insertions(+), 10 deletions(-)
12
13
diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
14
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/arm/tcg-target.inc.c
17
--- a/include/tcg/tcg-op.h
16
+++ b/tcg/arm/tcg-target.inc.c
18
+++ b/include/tcg/tcg-op.h
17
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
19
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2);
18
#endif
20
void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg);
21
void tcg_gen_ctpop_i32(TCGv_i32 a1, TCGv_i32 a2);
22
void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
23
-void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2);
24
+void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
25
void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
26
-void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2);
27
+void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
28
void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2,
29
unsigned int ofs, unsigned int len);
30
void tcg_gen_deposit_z_i32(TCGv_i32 ret, TCGv_i32 arg,
31
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2);
32
void tcg_gen_clrsb_i64(TCGv_i64 ret, TCGv_i64 arg);
33
void tcg_gen_ctpop_i64(TCGv_i64 a1, TCGv_i64 a2);
34
void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
35
-void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2);
36
+void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
37
void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
38
-void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2);
39
+void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
40
void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2,
41
unsigned int ofs, unsigned int len);
42
void tcg_gen_deposit_z_i64(TCGv_i64 ret, TCGv_i64 arg,
43
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/tcg/tcg-op.c
46
+++ b/tcg/tcg-op.c
47
@@ -XXX,XX +XXX,XX @@ void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
48
}
19
}
49
}
20
50
21
-static tcg_insn_unit *tb_ret_addr;
51
-void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2)
22
static void tcg_out_epilogue(TCGContext *s);
52
+void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
23
53
{
24
static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
54
- tcg_debug_assert(arg2 < 32);
25
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
55
+ tcg_debug_assert(arg2 >= 0 && arg2 < 32);
26
56
/* some cases can be optimized here */
27
switch (opc) {
57
if (arg2 == 0) {
28
case INDEX_op_exit_tb:
58
tcg_gen_mov_i32(ret, arg1);
29
- /* Reuse the zeroing that exists for goto_ptr. */
59
@@ -XXX,XX +XXX,XX @@ void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
30
- a0 = args[0];
60
}
31
- if (a0 == 0) {
32
- tcg_out_goto(s, COND_AL, s->code_gen_epilogue);
33
- } else {
34
- tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]);
35
- tcg_out_goto(s, COND_AL, tb_ret_addr);
36
- }
37
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, args[0]);
38
+ tcg_out_epilogue(s);
39
break;
40
case INDEX_op_goto_tb:
41
{
42
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
43
*/
44
s->code_gen_epilogue = s->code_ptr;
45
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 0);
46
- tb_ret_addr = s->code_ptr;
47
tcg_out_epilogue(s);
48
}
61
}
49
62
63
-void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2)
64
+void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
65
{
66
- tcg_debug_assert(arg2 < 32);
67
+ tcg_debug_assert(arg2 >= 0 && arg2 < 32);
68
/* some cases can be optimized here */
69
if (arg2 == 0) {
70
tcg_gen_mov_i32(ret, arg1);
71
@@ -XXX,XX +XXX,XX @@ void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
72
}
73
}
74
75
-void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2)
76
+void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
77
{
78
- tcg_debug_assert(arg2 < 64);
79
+ tcg_debug_assert(arg2 >= 0 && arg2 < 64);
80
/* some cases can be optimized here */
81
if (arg2 == 0) {
82
tcg_gen_mov_i64(ret, arg1);
83
@@ -XXX,XX +XXX,XX @@ void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
84
}
85
}
86
87
-void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2)
88
+void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
89
{
90
- tcg_debug_assert(arg2 < 64);
91
+ tcg_debug_assert(arg2 >= 0 && arg2 < 64);
92
/* some cases can be optimized here */
93
if (arg2 == 0) {
94
tcg_gen_mov_i64(ret, arg1);
50
--
95
--
51
2.20.1
96
2.20.1
52
97
53
98
diff view generated by jsdifflib