1
The following changes since commit a36d64f43325fa503075cc9408ddabb69b32f829:
1
Folding in a target/alpha patch since both queues
2
are singletons this time.
2
3
3
Merge remote-tracking branch 'remotes/stsquad/tags/pull-testing-and-gdbstub-060520-1' into staging (2020-05-06 14:06:00 +0100)
4
5
r~
6
7
8
The following changes since commit 25d75c99b2e5941c67049ee776efdb226414f4c6:
9
10
Merge remote-tracking branch 'remotes/xtensa/tags/20210403-xtensa' into staging (2021-04-04 21:48:45 +0100)
4
11
5
are available in the Git repository at:
12
are available in the Git repository at:
6
13
7
https://github.com/rth7680/qemu.git tags/pull-tcg-20200506
14
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20210405
8
15
9
for you to fetch changes up to 07dada0336a83002dfa8673a9220a88e13d9a45c:
16
for you to fetch changes up to ef951ee33fba780dd6c2b7f8ff25c84c3f87a6b8:
10
17
11
tcg: Fix integral argument type to tcg_gen_rot[rl]i_i{32,64} (2020-05-06 09:25:10 -0700)
18
target/alpha: fix icount handling for timer instructions (2021-04-05 07:32:56 -0700)
12
19
13
----------------------------------------------------------------
20
----------------------------------------------------------------
14
Add tcg_gen_gvec_dup_imm
21
tcg/mips tlb lookup fix
15
Misc tcg patches
22
target/alpha icount fix
16
23
17
----------------------------------------------------------------
24
----------------------------------------------------------------
18
Richard Henderson (10):
25
Kele Huang (1):
19
tcg: Add tcg_gen_gvec_dup_imm
26
tcg/mips: Fix SoftTLB comparison on mips backend
20
target/s390x: Use tcg_gen_gvec_dup_imm
21
target/ppc: Use tcg_gen_gvec_dup_imm
22
target/arm: Use tcg_gen_gvec_dup_imm
23
tcg: Use tcg_gen_gvec_dup_imm in logical simplifications
24
tcg: Remove tcg_gen_gvec_dup{8,16,32,64}i
25
tcg: Add tcg_gen_gvec_dup_tl
26
tcg: Improve vector tail clearing
27
tcg: Add load_dest parameter to GVecGen2
28
tcg: Fix integral argument type to tcg_gen_rot[rl]i_i{32,64}
29
27
30
include/tcg/tcg-op-gvec.h | 13 ++-
28
Pavel Dovgalyuk (1):
31
include/tcg/tcg-op.h | 8 +-
29
target/alpha: fix icount handling for timer instructions
32
target/arm/translate-a64.c | 10 +--
33
target/arm/translate-sve.c | 12 ++-
34
target/arm/translate.c | 9 +-
35
target/ppc/translate/vmx-impl.inc.c | 32 +++----
36
target/ppc/translate/vsx-impl.inc.c | 2 +-
37
target/s390x/translate_vx.inc.c | 41 ++-------
38
tcg/tcg-op-gvec.c | 162 +++++++++++++++++++++++-------------
39
tcg/tcg-op.c | 16 ++--
40
10 files changed, 166 insertions(+), 139 deletions(-)
41
30
31
target/alpha/translate.c | 9 +++++++--
32
tcg/mips/tcg-target.c.inc | 2 +-
33
2 files changed, 8 insertions(+), 3 deletions(-)
34
diff view generated by jsdifflib
Deleted patch
1
Add a version of tcg_gen_dup_* that takes both immediate and
2
a vector element size operand. This will replace the set of
3
tcg_gen_gvec_dup{8,16,32,64}i functions that encode the element
4
size within the function name.
5
1
6
Reviewed-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
7
Reviewed-by: David Hildenbrand <david@redhat.com>
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
include/tcg/tcg-op-gvec.h | 2 ++
12
tcg/tcg-op-gvec.c | 7 +++++++
13
2 files changed, 9 insertions(+)
14
15
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/include/tcg/tcg-op-gvec.h
18
+++ b/include/tcg/tcg-op-gvec.h
19
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_ors(unsigned vece, uint32_t dofs, uint32_t aofs,
20
21
void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs,
22
uint32_t s, uint32_t m);
23
+void tcg_gen_gvec_dup_imm(unsigned vece, uint32_t dofs, uint32_t s,
24
+ uint32_t m, uint64_t imm);
25
void tcg_gen_gvec_dup_i32(unsigned vece, uint32_t dofs, uint32_t s,
26
uint32_t m, TCGv_i32);
27
void tcg_gen_gvec_dup_i64(unsigned vece, uint32_t dofs, uint32_t s,
28
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/tcg/tcg-op-gvec.c
31
+++ b/tcg/tcg-op-gvec.c
32
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_dup8i(uint32_t dofs, uint32_t oprsz,
33
do_dup(MO_8, dofs, oprsz, maxsz, NULL, NULL, x);
34
}
35
36
+void tcg_gen_gvec_dup_imm(unsigned vece, uint32_t dofs, uint32_t oprsz,
37
+ uint32_t maxsz, uint64_t x)
38
+{
39
+ check_size_align(oprsz, maxsz, dofs);
40
+ do_dup(vece, dofs, oprsz, maxsz, NULL, NULL, x);
41
+}
42
+
43
void tcg_gen_gvec_not(unsigned vece, uint32_t dofs, uint32_t aofs,
44
uint32_t oprsz, uint32_t maxsz)
45
{
46
--
47
2.20.1
48
49
diff view generated by jsdifflib
Deleted patch
1
The gen_gvec_dupi switch is unnecessary with the new function.
2
Replace it with a local gen_gvec_dup_imm that takes care of the
3
register to offset conversion and length arguments.
4
1
5
Drop zero_vec and use use gen_gvec_dup_imm with 0.
6
7
Reviewed-by: David Hildenbrand <david@redhat.com>
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
target/s390x/translate_vx.inc.c | 41 +++++++--------------------------
12
1 file changed, 8 insertions(+), 33 deletions(-)
13
14
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/s390x/translate_vx.inc.c
17
+++ b/target/s390x/translate_vx.inc.c
18
@@ -XXX,XX +XXX,XX @@ static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
19
#define gen_gvec_mov(v1, v2) \
20
tcg_gen_gvec_mov(0, vec_full_reg_offset(v1), vec_full_reg_offset(v2), 16, \
21
16)
22
-#define gen_gvec_dup64i(v1, c) \
23
- tcg_gen_gvec_dup64i(vec_full_reg_offset(v1), 16, 16, c)
24
+#define gen_gvec_dup_imm(es, v1, c) \
25
+ tcg_gen_gvec_dup_imm(es, vec_full_reg_offset(v1), 16, 16, c);
26
#define gen_gvec_fn_2(fn, es, v1, v2) \
27
tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
28
16, 16)
29
@@ -XXX,XX +XXX,XX @@ static void gen_gvec128_4_i64(gen_gvec128_4_i64_fn fn, uint8_t d, uint8_t a,
30
tcg_temp_free_i64(cl);
31
}
32
33
-static void gen_gvec_dupi(uint8_t es, uint8_t reg, uint64_t c)
34
-{
35
- switch (es) {
36
- case ES_8:
37
- tcg_gen_gvec_dup8i(vec_full_reg_offset(reg), 16, 16, c);
38
- break;
39
- case ES_16:
40
- tcg_gen_gvec_dup16i(vec_full_reg_offset(reg), 16, 16, c);
41
- break;
42
- case ES_32:
43
- tcg_gen_gvec_dup32i(vec_full_reg_offset(reg), 16, 16, c);
44
- break;
45
- case ES_64:
46
- gen_gvec_dup64i(reg, c);
47
- break;
48
- default:
49
- g_assert_not_reached();
50
- }
51
-}
52
-
53
-static void zero_vec(uint8_t reg)
54
-{
55
- tcg_gen_gvec_dup8i(vec_full_reg_offset(reg), 16, 16, 0);
56
-}
57
-
58
static void gen_addi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
59
uint64_t b)
60
{
61
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_vgbm(DisasContext *s, DisasOps *o)
62
* Masks for both 64 bit elements of the vector are the same.
63
* Trust tcg to produce a good constant loading.
64
*/
65
- gen_gvec_dup64i(get_field(s, v1),
66
- generate_byte_mask(i2 & 0xff));
67
+ gen_gvec_dup_imm(ES_64, get_field(s, v1),
68
+ generate_byte_mask(i2 & 0xff));
69
} else {
70
TCGv_i64 t = tcg_temp_new_i64();
71
72
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_vgm(DisasContext *s, DisasOps *o)
73
}
74
}
75
76
- gen_gvec_dupi(es, get_field(s, v1), mask);
77
+ gen_gvec_dup_imm(es, get_field(s, v1), mask);
78
return DISAS_NEXT;
79
}
80
81
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_vllez(DisasContext *s, DisasOps *o)
82
83
t = tcg_temp_new_i64();
84
tcg_gen_qemu_ld_i64(t, o->addr1, get_mem_index(s), MO_TE | es);
85
- zero_vec(get_field(s, v1));
86
+ gen_gvec_dup_imm(es, get_field(s, v1), 0);
87
write_vec_element_i64(t, get_field(s, v1), enr, es);
88
tcg_temp_free_i64(t);
89
return DISAS_NEXT;
90
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_vrepi(DisasContext *s, DisasOps *o)
91
return DISAS_NORETURN;
92
}
93
94
- gen_gvec_dupi(es, get_field(s, v1), data);
95
+ gen_gvec_dup_imm(es, get_field(s, v1), data);
96
return DISAS_NEXT;
97
}
98
99
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_vcksm(DisasContext *s, DisasOps *o)
100
read_vec_element_i32(tmp, get_field(s, v2), i, ES_32);
101
tcg_gen_add2_i32(tmp, sum, sum, sum, tmp, tmp);
102
}
103
- zero_vec(get_field(s, v1));
104
+ gen_gvec_dup_imm(ES_32, get_field(s, v1), 0);
105
write_vec_element_i32(sum, get_field(s, v1), 1, ES_32);
106
107
tcg_temp_free_i32(tmp);
108
--
109
2.20.1
110
111
diff view generated by jsdifflib
1
For the benefit of compatibility of function pointer types,
1
From: Kele Huang <kele.hwang@gmail.com>
2
we have standardized on int32_t and int64_t as the integral
3
argument to tcg expanders.
4
2
5
We converted most of them in 474b2e8f0f7, but missed the rotates.
3
The addrl used to compare with SoftTLB entry should be sign-extended
4
in common case, and it will cause constant failing in SoftTLB
5
comparisons for the addrl whose address is over 0x80000000 on the
6
emulation of 32-bit guest on 64-bit host.
6
7
8
This is an important performance bug fix. Spec2000 gzip rate increase
9
from ~45 to ~140 on Loongson 3A4000 (MIPS compatible platform).
10
11
Signed-off-by: Kele Huang <kele.hwang@gmail.com>
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
12
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
13
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
14
Message-Id: <20210401100457.191458-1-kele.hwang@gmail.com>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
15
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
16
---
11
include/tcg/tcg-op.h | 8 ++++----
17
tcg/mips/tcg-target.c.inc | 2 +-
12
tcg/tcg-op.c | 16 ++++++++--------
18
1 file changed, 1 insertion(+), 1 deletion(-)
13
2 files changed, 12 insertions(+), 12 deletions(-)
14
19
15
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
20
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
16
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
17
--- a/include/tcg/tcg-op.h
22
--- a/tcg/mips/tcg-target.c.inc
18
+++ b/include/tcg/tcg-op.h
23
+++ b/tcg/mips/tcg-target.c.inc
19
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2);
24
@@ -XXX,XX +XXX,XX @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
20
void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg);
25
load the tlb addend for the fast path. */
21
void tcg_gen_ctpop_i32(TCGv_i32 a1, TCGv_i32 a2);
26
tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off);
22
void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
23
-void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2);
24
+void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
25
void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
26
-void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2);
27
+void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
28
void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2,
29
unsigned int ofs, unsigned int len);
30
void tcg_gen_deposit_z_i32(TCGv_i32 ret, TCGv_i32 arg,
31
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2);
32
void tcg_gen_clrsb_i64(TCGv_i64 ret, TCGv_i64 arg);
33
void tcg_gen_ctpop_i64(TCGv_i64 a1, TCGv_i64 a2);
34
void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
35
-void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2);
36
+void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
37
void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
38
-void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2);
39
+void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
40
void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2,
41
unsigned int ofs, unsigned int len);
42
void tcg_gen_deposit_z_i64(TCGv_i64 ret, TCGv_i64 arg,
43
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/tcg/tcg-op.c
46
+++ b/tcg/tcg-op.c
47
@@ -XXX,XX +XXX,XX @@ void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
48
}
27
}
49
}
28
- tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl);
50
29
51
-void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2)
30
/* Zero extend a 32-bit guest address for a 64-bit host. */
52
+void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
31
if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
53
{
32
tcg_out_ext32u(s, base, addrl);
54
- tcg_debug_assert(arg2 < 32);
33
addrl = base;
55
+ tcg_debug_assert(arg2 >= 0 && arg2 < 32);
56
/* some cases can be optimized here */
57
if (arg2 == 0) {
58
tcg_gen_mov_i32(ret, arg1);
59
@@ -XXX,XX +XXX,XX @@ void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
60
}
34
}
61
}
35
+ tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl);
62
36
63
-void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2)
37
label_ptr[0] = s->code_ptr;
64
+void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
38
tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0);
65
{
66
- tcg_debug_assert(arg2 < 32);
67
+ tcg_debug_assert(arg2 >= 0 && arg2 < 32);
68
/* some cases can be optimized here */
69
if (arg2 == 0) {
70
tcg_gen_mov_i32(ret, arg1);
71
@@ -XXX,XX +XXX,XX @@ void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
72
}
73
}
74
75
-void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2)
76
+void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
77
{
78
- tcg_debug_assert(arg2 < 64);
79
+ tcg_debug_assert(arg2 >= 0 && arg2 < 64);
80
/* some cases can be optimized here */
81
if (arg2 == 0) {
82
tcg_gen_mov_i64(ret, arg1);
83
@@ -XXX,XX +XXX,XX @@ void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
84
}
85
}
86
87
-void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2)
88
+void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
89
{
90
- tcg_debug_assert(arg2 < 64);
91
+ tcg_debug_assert(arg2 >= 0 && arg2 < 64);
92
/* some cases can be optimized here */
93
if (arg2 == 0) {
94
tcg_gen_mov_i64(ret, arg1);
95
--
39
--
96
2.20.1
40
2.25.1
97
41
98
42
diff view generated by jsdifflib
1
We can now unify the implementation of the 3 VSPLTI instructions.
1
From: Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru>
2
2
3
Acked-by: David Gibson <david@gibson.dropbear.id.au>
3
This patch handles icount mode for timer read/write instructions,
4
because it is required to call gen_io_start in such cases.
5
6
Signed-off-by: Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-Id: <161700373035.1135822.16451510827008616793.stgit@pasha-ThinkPad-X280>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
10
---
6
target/ppc/translate/vmx-impl.inc.c | 32 ++++++++++++++++-------------
11
target/alpha/translate.c | 9 +++++++--
7
target/ppc/translate/vsx-impl.inc.c | 2 +-
12
1 file changed, 7 insertions(+), 2 deletions(-)
8
2 files changed, 19 insertions(+), 15 deletions(-)
9
13
10
diff --git a/target/ppc/translate/vmx-impl.inc.c b/target/ppc/translate/vmx-impl.inc.c
14
diff --git a/target/alpha/translate.c b/target/alpha/translate.c
11
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
12
--- a/target/ppc/translate/vmx-impl.inc.c
16
--- a/target/alpha/translate.c
13
+++ b/target/ppc/translate/vmx-impl.inc.c
17
+++ b/target/alpha/translate.c
14
@@ -XXX,XX +XXX,XX @@ GEN_VXRFORM_DUAL(vcmpbfp, PPC_ALTIVEC, PPC_NONE, \
18
@@ -XXX,XX +XXX,XX @@ static DisasJumpType gen_mfpr(DisasContext *ctx, TCGv va, int regno)
15
GEN_VXRFORM_DUAL(vcmpgtfp, PPC_ALTIVEC, PPC_NONE, \
19
case 249: /* VMTIME */
16
vcmpgtud, PPC_NONE, PPC2_ALTIVEC_207)
20
helper = gen_helper_get_vmtime;
17
21
do_helper:
18
-#define GEN_VXFORM_DUPI(name, tcg_op, opc2, opc3) \
22
- if (icount_enabled()) {
19
-static void glue(gen_, name)(DisasContext *ctx) \
23
+ if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
20
- { \
24
gen_io_start();
21
- int simm; \
25
helper(va);
22
- if (unlikely(!ctx->altivec_enabled)) { \
26
return DISAS_PC_STALE;
23
- gen_exception(ctx, POWERPC_EXCP_VPU); \
27
@@ -XXX,XX +XXX,XX @@ static DisasJumpType gen_mfpr(DisasContext *ctx, TCGv va, int regno)
24
- return; \
28
static DisasJumpType gen_mtpr(DisasContext *ctx, TCGv vb, int regno)
25
- } \
29
{
26
- simm = SIMM5(ctx->opcode); \
30
int data;
27
- tcg_op(avr_full_offset(rD(ctx->opcode)), 16, 16, simm); \
31
+ DisasJumpType ret = DISAS_NEXT;
28
+static void gen_vsplti(DisasContext *ctx, int vece)
32
29
+{
33
switch (regno) {
30
+ int simm;
34
case 255:
31
+
35
@@ -XXX,XX +XXX,XX @@ static DisasJumpType gen_mtpr(DisasContext *ctx, TCGv vb, int regno)
32
+ if (unlikely(!ctx->altivec_enabled)) {
36
33
+ gen_exception(ctx, POWERPC_EXCP_VPU);
37
case 251:
34
+ return;
38
/* ALARM */
39
+ if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
40
+ gen_io_start();
41
+ ret = DISAS_PC_STALE;
42
+ }
43
gen_helper_set_alarm(cpu_env, vb);
44
break;
45
46
@@ -XXX,XX +XXX,XX @@ static DisasJumpType gen_mtpr(DisasContext *ctx, TCGv vb, int regno)
47
break;
35
}
48
}
36
49
37
-GEN_VXFORM_DUPI(vspltisb, tcg_gen_gvec_dup8i, 6, 12);
50
- return DISAS_NEXT;
38
-GEN_VXFORM_DUPI(vspltish, tcg_gen_gvec_dup16i, 6, 13);
51
+ return ret;
39
-GEN_VXFORM_DUPI(vspltisw, tcg_gen_gvec_dup32i, 6, 14);
40
+ simm = SIMM5(ctx->opcode);
41
+ tcg_gen_gvec_dup_imm(vece, avr_full_offset(rD(ctx->opcode)), 16, 16, simm);
42
+}
43
+
44
+#define GEN_VXFORM_VSPLTI(name, vece, opc2, opc3) \
45
+static void glue(gen_, name)(DisasContext *ctx) { gen_vsplti(ctx, vece); }
46
+
47
+GEN_VXFORM_VSPLTI(vspltisb, MO_8, 6, 12);
48
+GEN_VXFORM_VSPLTI(vspltish, MO_16, 6, 13);
49
+GEN_VXFORM_VSPLTI(vspltisw, MO_32, 6, 14);
50
51
#define GEN_VXFORM_NOA(name, opc2, opc3) \
52
static void glue(gen_, name)(DisasContext *ctx) \
53
@@ -XXX,XX +XXX,XX @@ GEN_VXFORM_DUAL(vsldoi, PPC_ALTIVEC, PPC_NONE,
54
#undef GEN_VXRFORM_DUAL
55
#undef GEN_VXRFORM1
56
#undef GEN_VXRFORM
57
-#undef GEN_VXFORM_DUPI
58
+#undef GEN_VXFORM_VSPLTI
59
#undef GEN_VXFORM_NOA
60
#undef GEN_VXFORM_UIMM
61
#undef GEN_VAFORM_PAIRED
62
diff --git a/target/ppc/translate/vsx-impl.inc.c b/target/ppc/translate/vsx-impl.inc.c
63
index XXXXXXX..XXXXXXX 100644
64
--- a/target/ppc/translate/vsx-impl.inc.c
65
+++ b/target/ppc/translate/vsx-impl.inc.c
66
@@ -XXX,XX +XXX,XX @@ static void gen_xxspltib(DisasContext *ctx)
67
return;
68
}
69
}
70
- tcg_gen_gvec_dup8i(vsr_full_offset(rt), 16, 16, uim8);
71
+ tcg_gen_gvec_dup_imm(MO_8, vsr_full_offset(rt), 16, 16, uim8);
72
}
52
}
73
53
#endif /* !USER_ONLY*/
74
static void gen_xxsldwi(DisasContext *ctx)
54
75
--
55
--
76
2.20.1
56
2.25.1
77
57
78
58
diff view generated by jsdifflib
Deleted patch
1
In a few cases, we're able to remove some manual replication.
2
1
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
target/arm/translate-a64.c | 10 +++++-----
7
target/arm/translate-sve.c | 12 +++++-------
8
target/arm/translate.c | 9 ++++++---
9
3 files changed, 16 insertions(+), 15 deletions(-)
10
11
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/translate-a64.c
14
+++ b/target/arm/translate-a64.c
15
@@ -XXX,XX +XXX,XX @@ static void clear_vec_high(DisasContext *s, bool is_q, int rd)
16
tcg_temp_free_i64(tcg_zero);
17
}
18
if (vsz > 16) {
19
- tcg_gen_gvec_dup8i(ofs + 16, vsz - 16, vsz - 16, 0);
20
+ tcg_gen_gvec_dup_imm(MO_64, ofs + 16, vsz - 16, vsz - 16, 0);
21
}
22
}
23
24
@@ -XXX,XX +XXX,XX @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
25
26
if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
27
/* MOVI or MVNI, with MVNI negation handled above. */
28
- tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), is_q ? 16 : 8,
29
- vec_full_reg_size(s), imm);
30
+ tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8,
31
+ vec_full_reg_size(s), imm);
32
} else {
33
/* ORR or BIC, with BIC negation to AND handled above. */
34
if (is_neg) {
35
@@ -XXX,XX +XXX,XX @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
36
if (is_u) {
37
if (shift == 8 << size) {
38
/* Shift count the same size as element size produces zero. */
39
- tcg_gen_gvec_dup8i(vec_full_reg_offset(s, rd),
40
- is_q ? 16 : 8, vec_full_reg_size(s), 0);
41
+ tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd),
42
+ is_q ? 16 : 8, vec_full_reg_size(s), 0);
43
} else {
44
gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shri, size);
45
}
46
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/translate-sve.c
49
+++ b/target/arm/translate-sve.c
50
@@ -XXX,XX +XXX,XX @@ static bool do_mov_z(DisasContext *s, int rd, int rn)
51
static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
52
{
53
unsigned vsz = vec_full_reg_size(s);
54
- tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
55
+ tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
56
}
57
58
/* Invoke a vector expander on two Pregs. */
59
@@ -XXX,XX +XXX,XX @@ static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
60
unsigned oprsz = size_for_gvec(setsz / 8);
61
62
if (oprsz * 8 == setsz) {
63
- tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
64
+ tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
65
goto done;
66
}
67
}
68
@@ -XXX,XX +XXX,XX @@ static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
69
unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
70
tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
71
} else {
72
- tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
73
+ tcg_gen_gvec_dup_imm(esz, dofs, vsz, vsz, 0);
74
}
75
}
76
return true;
77
@@ -XXX,XX +XXX,XX @@ static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
78
79
/* Decode the VFP immediate. */
80
imm = vfp_expand_imm(a->esz, a->imm);
81
- imm = dup_const(a->esz, imm);
82
-
83
- tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
84
+ tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
85
}
86
return true;
87
}
88
@@ -XXX,XX +XXX,XX @@ static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
89
unsigned vsz = vec_full_reg_size(s);
90
int dofs = vec_full_reg_offset(s, a->rd);
91
92
- tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
93
+ tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
94
}
95
return true;
96
}
97
diff --git a/target/arm/translate.c b/target/arm/translate.c
98
index XXXXXXX..XXXXXXX 100644
99
--- a/target/arm/translate.c
100
+++ b/target/arm/translate.c
101
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
102
MIN(shift, (8 << size) - 1),
103
vec_size, vec_size);
104
} else if (shift >= 8 << size) {
105
- tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0);
106
+ tcg_gen_gvec_dup_imm(MO_8, rd_ofs, vec_size,
107
+ vec_size, 0);
108
} else {
109
tcg_gen_gvec_shri(size, rd_ofs, rm_ofs, shift,
110
vec_size, vec_size);
111
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
112
* architecturally valid and results in zero.
113
*/
114
if (shift >= 8 << size) {
115
- tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0);
116
+ tcg_gen_gvec_dup_imm(size, rd_ofs,
117
+ vec_size, vec_size, 0);
118
} else {
119
tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift,
120
vec_size, vec_size);
121
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
122
}
123
tcg_temp_free_i64(t64);
124
} else {
125
- tcg_gen_gvec_dup32i(reg_ofs, vec_size, vec_size, imm);
126
+ tcg_gen_gvec_dup_imm(MO_32, reg_ofs, vec_size,
127
+ vec_size, imm);
128
}
129
}
130
}
131
--
132
2.20.1
133
134
diff view generated by jsdifflib
Deleted patch
1
Replace the outgoing interface.
2
1
3
Reviewed-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/tcg-op-gvec.c | 8 ++++----
8
1 file changed, 4 insertions(+), 4 deletions(-)
9
10
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/tcg-op-gvec.c
13
+++ b/tcg/tcg-op-gvec.c
14
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_xor(unsigned vece, uint32_t dofs, uint32_t aofs,
15
};
16
17
if (aofs == bofs) {
18
- tcg_gen_gvec_dup8i(dofs, oprsz, maxsz, 0);
19
+ tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, 0);
20
} else {
21
tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g);
22
}
23
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_andc(unsigned vece, uint32_t dofs, uint32_t aofs,
24
};
25
26
if (aofs == bofs) {
27
- tcg_gen_gvec_dup8i(dofs, oprsz, maxsz, 0);
28
+ tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, 0);
29
} else {
30
tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g);
31
}
32
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_orc(unsigned vece, uint32_t dofs, uint32_t aofs,
33
};
34
35
if (aofs == bofs) {
36
- tcg_gen_gvec_dup8i(dofs, oprsz, maxsz, -1);
37
+ tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, -1);
38
} else {
39
tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g);
40
}
41
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_eqv(unsigned vece, uint32_t dofs, uint32_t aofs,
42
};
43
44
if (aofs == bofs) {
45
- tcg_gen_gvec_dup8i(dofs, oprsz, maxsz, -1);
46
+ tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, -1);
47
} else {
48
tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g);
49
}
50
--
51
2.20.1
52
53
diff view generated by jsdifflib
Deleted patch
1
These interfaces are now unused.
2
1
3
Reviewed-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
4
Reviewed-by: David Hildenbrand <david@redhat.com>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
include/tcg/tcg-op-gvec.h | 5 -----
9
tcg/tcg-op-gvec.c | 28 ----------------------------
10
2 files changed, 33 deletions(-)
11
12
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/include/tcg/tcg-op-gvec.h
15
+++ b/include/tcg/tcg-op-gvec.h
16
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_dup_i32(unsigned vece, uint32_t dofs, uint32_t s,
17
void tcg_gen_gvec_dup_i64(unsigned vece, uint32_t dofs, uint32_t s,
18
uint32_t m, TCGv_i64);
19
20
-void tcg_gen_gvec_dup8i(uint32_t dofs, uint32_t s, uint32_t m, uint8_t x);
21
-void tcg_gen_gvec_dup16i(uint32_t dofs, uint32_t s, uint32_t m, uint16_t x);
22
-void tcg_gen_gvec_dup32i(uint32_t dofs, uint32_t s, uint32_t m, uint32_t x);
23
-void tcg_gen_gvec_dup64i(uint32_t dofs, uint32_t s, uint32_t m, uint64_t x);
24
-
25
void tcg_gen_gvec_shli(unsigned vece, uint32_t dofs, uint32_t aofs,
26
int64_t shift, uint32_t oprsz, uint32_t maxsz);
27
void tcg_gen_gvec_shri(unsigned vece, uint32_t dofs, uint32_t aofs,
28
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/tcg/tcg-op-gvec.c
31
+++ b/tcg/tcg-op-gvec.c
32
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs,
33
}
34
}
35
36
-void tcg_gen_gvec_dup64i(uint32_t dofs, uint32_t oprsz,
37
- uint32_t maxsz, uint64_t x)
38
-{
39
- check_size_align(oprsz, maxsz, dofs);
40
- do_dup(MO_64, dofs, oprsz, maxsz, NULL, NULL, x);
41
-}
42
-
43
-void tcg_gen_gvec_dup32i(uint32_t dofs, uint32_t oprsz,
44
- uint32_t maxsz, uint32_t x)
45
-{
46
- check_size_align(oprsz, maxsz, dofs);
47
- do_dup(MO_32, dofs, oprsz, maxsz, NULL, NULL, x);
48
-}
49
-
50
-void tcg_gen_gvec_dup16i(uint32_t dofs, uint32_t oprsz,
51
- uint32_t maxsz, uint16_t x)
52
-{
53
- check_size_align(oprsz, maxsz, dofs);
54
- do_dup(MO_16, dofs, oprsz, maxsz, NULL, NULL, x);
55
-}
56
-
57
-void tcg_gen_gvec_dup8i(uint32_t dofs, uint32_t oprsz,
58
- uint32_t maxsz, uint8_t x)
59
-{
60
- check_size_align(oprsz, maxsz, dofs);
61
- do_dup(MO_8, dofs, oprsz, maxsz, NULL, NULL, x);
62
-}
63
-
64
void tcg_gen_gvec_dup_imm(unsigned vece, uint32_t dofs, uint32_t oprsz,
65
uint32_t maxsz, uint64_t x)
66
{
67
--
68
2.20.1
69
70
diff view generated by jsdifflib
Deleted patch
1
For use when a target needs to pass a configure-specific
2
target_ulong value to duplicate.
3
1
4
Reviewed-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
5
Reviewed-by: David Hildenbrand <david@redhat.com>
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
include/tcg/tcg-op-gvec.h | 6 ++++++
10
1 file changed, 6 insertions(+)
11
12
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/include/tcg/tcg-op-gvec.h
15
+++ b/include/tcg/tcg-op-gvec.h
16
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_dup_i32(unsigned vece, uint32_t dofs, uint32_t s,
17
void tcg_gen_gvec_dup_i64(unsigned vece, uint32_t dofs, uint32_t s,
18
uint32_t m, TCGv_i64);
19
20
+#if TARGET_LONG_BITS == 64
21
+# define tcg_gen_gvec_dup_tl tcg_gen_gvec_dup_i64
22
+#else
23
+# define tcg_gen_gvec_dup_tl tcg_gen_gvec_dup_i32
24
+#endif
25
+
26
void tcg_gen_gvec_shli(unsigned vece, uint32_t dofs, uint32_t aofs,
27
int64_t shift, uint32_t oprsz, uint32_t maxsz);
28
void tcg_gen_gvec_shri(unsigned vece, uint32_t dofs, uint32_t aofs,
29
--
30
2.20.1
31
32
diff view generated by jsdifflib
Deleted patch
1
Better handling of non-power-of-2 tails as seen with Arm 8-byte
2
vector operations.
3
1
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/tcg-op-gvec.c | 82 ++++++++++++++++++++++++++++++++++++-----------
8
1 file changed, 63 insertions(+), 19 deletions(-)
9
10
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/tcg-op-gvec.c
13
+++ b/tcg/tcg-op-gvec.c
14
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_5_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
15
in units of LNSZ. This limits the expansion of inline code. */
16
static inline bool check_size_impl(uint32_t oprsz, uint32_t lnsz)
17
{
18
- if (oprsz % lnsz == 0) {
19
- uint32_t lnct = oprsz / lnsz;
20
- return lnct >= 1 && lnct <= MAX_UNROLL;
21
+ uint32_t q, r;
22
+
23
+ if (oprsz < lnsz) {
24
+ return false;
25
}
26
- return false;
27
+
28
+ q = oprsz / lnsz;
29
+ r = oprsz % lnsz;
30
+ tcg_debug_assert((r & 7) == 0);
31
+
32
+ if (lnsz < 16) {
33
+ /* For sizes below 16, accept no remainder. */
34
+ if (r != 0) {
35
+ return false;
36
+ }
37
+ } else {
38
+ /*
39
+ * Recall that ARM SVE allows vector sizes that are not a
40
+ * power of 2, but always a multiple of 16. The intent is
41
+ * that e.g. size == 80 would be expanded with 2x32 + 1x16.
42
+ * In addition, expand_clr needs to handle a multiple of 8.
43
+ * Thus we can handle the tail with one more operation per
44
+ * diminishing power of 2.
45
+ */
46
+ q += ctpop32(r);
47
+ }
48
+
49
+ return q <= MAX_UNROLL;
50
}
51
52
static void expand_clr(uint32_t dofs, uint32_t maxsz);
53
@@ -XXX,XX +XXX,XX @@ static void gen_dup_i64(unsigned vece, TCGv_i64 out, TCGv_i64 in)
54
static TCGType choose_vector_type(const TCGOpcode *list, unsigned vece,
55
uint32_t size, bool prefer_i64)
56
{
57
- if (TCG_TARGET_HAS_v256 && check_size_impl(size, 32)) {
58
- /*
59
- * Recall that ARM SVE allows vector sizes that are not a
60
- * power of 2, but always a multiple of 16. The intent is
61
- * that e.g. size == 80 would be expanded with 2x32 + 1x16.
62
- * It is hard to imagine a case in which v256 is supported
63
- * but v128 is not, but check anyway.
64
- */
65
- if (tcg_can_emit_vecop_list(list, TCG_TYPE_V256, vece)
66
- && (size % 32 == 0
67
- || tcg_can_emit_vecop_list(list, TCG_TYPE_V128, vece))) {
68
- return TCG_TYPE_V256;
69
- }
70
+ /*
71
+ * Recall that ARM SVE allows vector sizes that are not a
72
+ * power of 2, but always a multiple of 16. The intent is
73
+ * that e.g. size == 80 would be expanded with 2x32 + 1x16.
74
+ * It is hard to imagine a case in which v256 is supported
75
+ * but v128 is not, but check anyway.
76
+ * In addition, expand_clr needs to handle a multiple of 8.
77
+ */
78
+ if (TCG_TARGET_HAS_v256 &&
79
+ check_size_impl(size, 32) &&
80
+ tcg_can_emit_vecop_list(list, TCG_TYPE_V256, vece) &&
81
+ (!(size & 16) ||
82
+ (TCG_TARGET_HAS_v128 &&
83
+ tcg_can_emit_vecop_list(list, TCG_TYPE_V128, vece))) &&
84
+ (!(size & 8) ||
85
+ (TCG_TARGET_HAS_v64 &&
86
+ tcg_can_emit_vecop_list(list, TCG_TYPE_V64, vece)))) {
87
+ return TCG_TYPE_V256;
88
}
89
- if (TCG_TARGET_HAS_v128 && check_size_impl(size, 16)
90
- && tcg_can_emit_vecop_list(list, TCG_TYPE_V128, vece)) {
91
+ if (TCG_TARGET_HAS_v128 &&
92
+ check_size_impl(size, 16) &&
93
+ tcg_can_emit_vecop_list(list, TCG_TYPE_V128, vece) &&
94
+ (!(size & 8) ||
95
+ (TCG_TARGET_HAS_v64 &&
96
+ tcg_can_emit_vecop_list(list, TCG_TYPE_V64, vece)))) {
97
return TCG_TYPE_V128;
98
}
99
if (TCG_TARGET_HAS_v64 && !prefer_i64 && check_size_impl(size, 8)
100
@@ -XXX,XX +XXX,XX @@ static void do_dup_store(TCGType type, uint32_t dofs, uint32_t oprsz,
101
{
102
uint32_t i = 0;
103
104
+ tcg_debug_assert(oprsz >= 8);
105
+
106
+ /*
107
+ * This may be expand_clr for the tail of an operation, e.g.
108
+ * oprsz == 8 && maxsz == 64. The first 8 bytes of this store
109
+ * are misaligned wrt the maximum vector size, so do that first.
110
+ */
111
+ if (dofs & 8) {
112
+ tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V64);
113
+ i += 8;
114
+ }
115
+
116
switch (type) {
117
case TCG_TYPE_V256:
118
/*
119
--
120
2.20.1
121
122
diff view generated by jsdifflib
Deleted patch
1
We have this same parameter for GVecGen2i, GVecGen3,
2
and GVecGen3i. This will make some SVE2 insns easier
3
to parameterize.
4
1
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
include/tcg/tcg-op-gvec.h | 2 ++
9
tcg/tcg-op-gvec.c | 45 ++++++++++++++++++++++++++++-----------
10
2 files changed, 34 insertions(+), 13 deletions(-)
11
12
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/include/tcg/tcg-op-gvec.h
15
+++ b/include/tcg/tcg-op-gvec.h
16
@@ -XXX,XX +XXX,XX @@ typedef struct {
17
uint8_t vece;
18
/* Prefer i64 to v64. */
19
bool prefer_i64;
20
+ /* Load dest as a 2nd source operand. */
21
+ bool load_dest;
22
} GVecGen2;
23
24
typedef struct {
25
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
26
index XXXXXXX..XXXXXXX 100644
27
--- a/tcg/tcg-op-gvec.c
28
+++ b/tcg/tcg-op-gvec.c
29
@@ -XXX,XX +XXX,XX @@ static void expand_clr(uint32_t dofs, uint32_t maxsz)
30
31
/* Expand OPSZ bytes worth of two-operand operations using i32 elements. */
32
static void expand_2_i32(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
33
- void (*fni)(TCGv_i32, TCGv_i32))
34
+ bool load_dest, void (*fni)(TCGv_i32, TCGv_i32))
35
{
36
TCGv_i32 t0 = tcg_temp_new_i32();
37
+ TCGv_i32 t1 = tcg_temp_new_i32();
38
uint32_t i;
39
40
for (i = 0; i < oprsz; i += 4) {
41
tcg_gen_ld_i32(t0, cpu_env, aofs + i);
42
- fni(t0, t0);
43
- tcg_gen_st_i32(t0, cpu_env, dofs + i);
44
+ if (load_dest) {
45
+ tcg_gen_ld_i32(t1, cpu_env, dofs + i);
46
+ }
47
+ fni(t1, t0);
48
+ tcg_gen_st_i32(t1, cpu_env, dofs + i);
49
}
50
tcg_temp_free_i32(t0);
51
+ tcg_temp_free_i32(t1);
52
}
53
54
static void expand_2i_i32(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
55
@@ -XXX,XX +XXX,XX @@ static void expand_4_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs,
56
57
/* Expand OPSZ bytes worth of two-operand operations using i64 elements. */
58
static void expand_2_i64(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
59
- void (*fni)(TCGv_i64, TCGv_i64))
60
+ bool load_dest, void (*fni)(TCGv_i64, TCGv_i64))
61
{
62
TCGv_i64 t0 = tcg_temp_new_i64();
63
+ TCGv_i64 t1 = tcg_temp_new_i64();
64
uint32_t i;
65
66
for (i = 0; i < oprsz; i += 8) {
67
tcg_gen_ld_i64(t0, cpu_env, aofs + i);
68
- fni(t0, t0);
69
- tcg_gen_st_i64(t0, cpu_env, dofs + i);
70
+ if (load_dest) {
71
+ tcg_gen_ld_i64(t1, cpu_env, dofs + i);
72
+ }
73
+ fni(t1, t0);
74
+ tcg_gen_st_i64(t1, cpu_env, dofs + i);
75
}
76
tcg_temp_free_i64(t0);
77
+ tcg_temp_free_i64(t1);
78
}
79
80
static void expand_2i_i64(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
81
@@ -XXX,XX +XXX,XX @@ static void expand_4_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs,
82
/* Expand OPSZ bytes worth of two-operand operations using host vectors. */
83
static void expand_2_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
84
uint32_t oprsz, uint32_t tysz, TCGType type,
85
+ bool load_dest,
86
void (*fni)(unsigned, TCGv_vec, TCGv_vec))
87
{
88
TCGv_vec t0 = tcg_temp_new_vec(type);
89
+ TCGv_vec t1 = tcg_temp_new_vec(type);
90
uint32_t i;
91
92
for (i = 0; i < oprsz; i += tysz) {
93
tcg_gen_ld_vec(t0, cpu_env, aofs + i);
94
- fni(vece, t0, t0);
95
- tcg_gen_st_vec(t0, cpu_env, dofs + i);
96
+ if (load_dest) {
97
+ tcg_gen_ld_vec(t1, cpu_env, dofs + i);
98
+ }
99
+ fni(vece, t1, t0);
100
+ tcg_gen_st_vec(t1, cpu_env, dofs + i);
101
}
102
tcg_temp_free_vec(t0);
103
+ tcg_temp_free_vec(t1);
104
}
105
106
/* Expand OPSZ bytes worth of two-vector operands and an immediate operand
107
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs,
108
* that e.g. size == 80 would be expanded with 2x32 + 1x16.
109
*/
110
some = QEMU_ALIGN_DOWN(oprsz, 32);
111
- expand_2_vec(g->vece, dofs, aofs, some, 32, TCG_TYPE_V256, g->fniv);
112
+ expand_2_vec(g->vece, dofs, aofs, some, 32, TCG_TYPE_V256,
113
+ g->load_dest, g->fniv);
114
if (some == oprsz) {
115
break;
116
}
117
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs,
118
maxsz -= some;
119
/* fallthru */
120
case TCG_TYPE_V128:
121
- expand_2_vec(g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128, g->fniv);
122
+ expand_2_vec(g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128,
123
+ g->load_dest, g->fniv);
124
break;
125
case TCG_TYPE_V64:
126
- expand_2_vec(g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64, g->fniv);
127
+ expand_2_vec(g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64,
128
+ g->load_dest, g->fniv);
129
break;
130
131
case 0:
132
if (g->fni8 && check_size_impl(oprsz, 8)) {
133
- expand_2_i64(dofs, aofs, oprsz, g->fni8);
134
+ expand_2_i64(dofs, aofs, oprsz, g->load_dest, g->fni8);
135
} else if (g->fni4 && check_size_impl(oprsz, 4)) {
136
- expand_2_i32(dofs, aofs, oprsz, g->fni4);
137
+ expand_2_i32(dofs, aofs, oprsz, g->load_dest, g->fni4);
138
} else {
139
assert(g->fno != NULL);
140
tcg_gen_gvec_2_ool(dofs, aofs, oprsz, maxsz, g->data, g->fno);
141
--
142
2.20.1
143
144
diff view generated by jsdifflib