1
The following changes since commit a36d64f43325fa503075cc9408ddabb69b32f829:
1
Pretty small still, but there are two patches that ought
2
to get backported to stable, so no point in delaying.
2
3
3
Merge remote-tracking branch 'remotes/stsquad/tags/pull-testing-and-gdbstub-060520-1' into staging (2020-05-06 14:06:00 +0100)
4
r~
5
6
The following changes since commit a5ba0a7e4e150d1350a041f0d0ef9ca6c8d7c307:
7
8
Merge tag 'pull-aspeed-20241211' of https://github.com/legoater/qemu into staging (2024-12-11 15:16:47 +0000)
4
9
5
are available in the Git repository at:
10
are available in the Git repository at:
6
11
7
https://github.com/rth7680/qemu.git tags/pull-tcg-20200506
12
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20241212
8
13
9
for you to fetch changes up to 07dada0336a83002dfa8673a9220a88e13d9a45c:
14
for you to fetch changes up to 7ac87b14a92234b6a89b701b4043ad6cf8bdcccf:
10
15
11
tcg: Fix integral argument type to tcg_gen_rot[rl]i_i{32,64} (2020-05-06 09:25:10 -0700)
16
target/sparc: Use memcpy() and remove memcpy32() (2024-12-12 14:28:38 -0600)
12
17
13
----------------------------------------------------------------
18
----------------------------------------------------------------
14
Add tcg_gen_gvec_dup_imm
19
tcg: Reset free_temps before tcg_optimize
15
Misc tcg patches
20
tcg/riscv: Fix StoreStore barrier generation
21
include/exec: Introduce fpst alias in helper-head.h.inc
22
target/sparc: Use memcpy() and remove memcpy32()
16
23
17
----------------------------------------------------------------
24
----------------------------------------------------------------
18
Richard Henderson (10):
25
Philippe Mathieu-Daudé (1):
19
tcg: Add tcg_gen_gvec_dup_imm
26
target/sparc: Use memcpy() and remove memcpy32()
20
target/s390x: Use tcg_gen_gvec_dup_imm
21
target/ppc: Use tcg_gen_gvec_dup_imm
22
target/arm: Use tcg_gen_gvec_dup_imm
23
tcg: Use tcg_gen_gvec_dup_imm in logical simplifications
24
tcg: Remove tcg_gen_gvec_dup{8,16,32,64}i
25
tcg: Add tcg_gen_gvec_dup_tl
26
tcg: Improve vector tail clearing
27
tcg: Add load_dest parameter to GVecGen2
28
tcg: Fix integral argument type to tcg_gen_rot[rl]i_i{32,64}
29
27
30
include/tcg/tcg-op-gvec.h | 13 ++-
28
Richard Henderson (2):
31
include/tcg/tcg-op.h | 8 +-
29
tcg: Reset free_temps before tcg_optimize
32
target/arm/translate-a64.c | 10 +--
30
include/exec: Introduce fpst alias in helper-head.h.inc
33
target/arm/translate-sve.c | 12 ++-
34
target/arm/translate.c | 9 +-
35
target/ppc/translate/vmx-impl.inc.c | 32 +++----
36
target/ppc/translate/vsx-impl.inc.c | 2 +-
37
target/s390x/translate_vx.inc.c | 41 ++-------
38
tcg/tcg-op-gvec.c | 162 +++++++++++++++++++++++-------------
39
tcg/tcg-op.c | 16 ++--
40
10 files changed, 166 insertions(+), 139 deletions(-)
41
31
32
Roman Artemev (1):
33
tcg/riscv: Fix StoreStore barrier generation
34
35
include/tcg/tcg-temp-internal.h | 6 ++++++
36
accel/tcg/plugin-gen.c | 2 +-
37
target/sparc/win_helper.c | 26 ++++++++------------------
38
tcg/tcg.c | 5 ++++-
39
include/exec/helper-head.h.inc | 3 +++
40
tcg/riscv/tcg-target.c.inc | 2 +-
41
6 files changed, 23 insertions(+), 21 deletions(-)
42
diff view generated by jsdifflib
1
Add a version of tcg_gen_dup_* that takes both immediate and
1
When allocating new temps during tcg_optmize, do not re-use
2
a vector element size operand. This will replace the set of
2
any EBB temps that were used within the TB. We do not have
3
tcg_gen_gvec_dup{8,16,32,64}i functions that encode the element
3
any idea what span of the TB in which the temp was live.
4
size within the function name.
5
4
6
Reviewed-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
5
Introduce tcg_temp_ebb_reset_freed and use before tcg_optimize,
7
Reviewed-by: David Hildenbrand <david@redhat.com>
6
as well as replacing the equivalent in plugin_gen_inject and
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
tcg_func_start.
8
9
Cc: qemu-stable@nongnu.org
10
Fixes: fb04ab7ddd8 ("tcg/optimize: Lower TCG_COND_TST{EQ,NE} if unsupported")
11
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2711
12
Reported-by: wannacu <wannacu2049@gmail.com>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
15
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
---
16
---
11
include/tcg/tcg-op-gvec.h | 2 ++
17
include/tcg/tcg-temp-internal.h | 6 ++++++
12
tcg/tcg-op-gvec.c | 7 +++++++
18
accel/tcg/plugin-gen.c | 2 +-
13
2 files changed, 9 insertions(+)
19
tcg/tcg.c | 5 ++++-
20
3 files changed, 11 insertions(+), 2 deletions(-)
14
21
15
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
22
diff --git a/include/tcg/tcg-temp-internal.h b/include/tcg/tcg-temp-internal.h
16
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
17
--- a/include/tcg/tcg-op-gvec.h
24
--- a/include/tcg/tcg-temp-internal.h
18
+++ b/include/tcg/tcg-op-gvec.h
25
+++ b/include/tcg/tcg-temp-internal.h
19
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_ors(unsigned vece, uint32_t dofs, uint32_t aofs,
26
@@ -XXX,XX +XXX,XX @@ TCGv_i64 tcg_temp_ebb_new_i64(void);
20
27
TCGv_ptr tcg_temp_ebb_new_ptr(void);
21
void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs,
28
TCGv_i128 tcg_temp_ebb_new_i128(void);
22
uint32_t s, uint32_t m);
29
23
+void tcg_gen_gvec_dup_imm(unsigned vece, uint32_t dofs, uint32_t s,
30
+/* Forget all freed EBB temps, so that new allocations produce new temps. */
24
+ uint32_t m, uint64_t imm);
31
+static inline void tcg_temp_ebb_reset_freed(TCGContext *s)
25
void tcg_gen_gvec_dup_i32(unsigned vece, uint32_t dofs, uint32_t s,
26
uint32_t m, TCGv_i32);
27
void tcg_gen_gvec_dup_i64(unsigned vece, uint32_t dofs, uint32_t s,
28
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/tcg/tcg-op-gvec.c
31
+++ b/tcg/tcg-op-gvec.c
32
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_dup8i(uint32_t dofs, uint32_t oprsz,
33
do_dup(MO_8, dofs, oprsz, maxsz, NULL, NULL, x);
34
}
35
36
+void tcg_gen_gvec_dup_imm(unsigned vece, uint32_t dofs, uint32_t oprsz,
37
+ uint32_t maxsz, uint64_t x)
38
+{
32
+{
39
+ check_size_align(oprsz, maxsz, dofs);
33
+ memset(s->free_temps, 0, sizeof(s->free_temps));
40
+ do_dup(vece, dofs, oprsz, maxsz, NULL, NULL, x);
41
+}
34
+}
42
+
35
+
43
void tcg_gen_gvec_not(unsigned vece, uint32_t dofs, uint32_t aofs,
36
#endif /* TCG_TEMP_FREE_H */
44
uint32_t oprsz, uint32_t maxsz)
37
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
45
{
38
index XXXXXXX..XXXXXXX 100644
39
--- a/accel/tcg/plugin-gen.c
40
+++ b/accel/tcg/plugin-gen.c
41
@@ -XXX,XX +XXX,XX @@ static void plugin_gen_inject(struct qemu_plugin_tb *plugin_tb)
42
* that might be live within the existing opcode stream.
43
* The simplest solution is to release them all and create new.
44
*/
45
- memset(tcg_ctx->free_temps, 0, sizeof(tcg_ctx->free_temps));
46
+ tcg_temp_ebb_reset_freed(tcg_ctx);
47
48
QTAILQ_FOREACH_SAFE(op, &tcg_ctx->ops, link, next) {
49
switch (op->opc) {
50
diff --git a/tcg/tcg.c b/tcg/tcg.c
51
index XXXXXXX..XXXXXXX 100644
52
--- a/tcg/tcg.c
53
+++ b/tcg/tcg.c
54
@@ -XXX,XX +XXX,XX @@ void tcg_func_start(TCGContext *s)
55
s->nb_temps = s->nb_globals;
56
57
/* No temps have been previously allocated for size or locality. */
58
- memset(s->free_temps, 0, sizeof(s->free_temps));
59
+ tcg_temp_ebb_reset_freed(s);
60
61
/* No constant temps have been previously allocated. */
62
for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
63
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
64
}
65
#endif
66
67
+ /* Do not reuse any EBB that may be allocated within the TB. */
68
+ tcg_temp_ebb_reset_freed(s);
69
+
70
tcg_optimize(s);
71
72
reachable_code_pass(s);
46
--
73
--
47
2.20.1
74
2.43.0
48
75
49
76
diff view generated by jsdifflib
Deleted patch
1
The gen_gvec_dupi switch is unnecessary with the new function.
2
Replace it with a local gen_gvec_dup_imm that takes care of the
3
register to offset conversion and length arguments.
4
1
5
Drop zero_vec and use use gen_gvec_dup_imm with 0.
6
7
Reviewed-by: David Hildenbrand <david@redhat.com>
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
target/s390x/translate_vx.inc.c | 41 +++++++--------------------------
12
1 file changed, 8 insertions(+), 33 deletions(-)
13
14
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/s390x/translate_vx.inc.c
17
+++ b/target/s390x/translate_vx.inc.c
18
@@ -XXX,XX +XXX,XX @@ static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
19
#define gen_gvec_mov(v1, v2) \
20
tcg_gen_gvec_mov(0, vec_full_reg_offset(v1), vec_full_reg_offset(v2), 16, \
21
16)
22
-#define gen_gvec_dup64i(v1, c) \
23
- tcg_gen_gvec_dup64i(vec_full_reg_offset(v1), 16, 16, c)
24
+#define gen_gvec_dup_imm(es, v1, c) \
25
+ tcg_gen_gvec_dup_imm(es, vec_full_reg_offset(v1), 16, 16, c);
26
#define gen_gvec_fn_2(fn, es, v1, v2) \
27
tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
28
16, 16)
29
@@ -XXX,XX +XXX,XX @@ static void gen_gvec128_4_i64(gen_gvec128_4_i64_fn fn, uint8_t d, uint8_t a,
30
tcg_temp_free_i64(cl);
31
}
32
33
-static void gen_gvec_dupi(uint8_t es, uint8_t reg, uint64_t c)
34
-{
35
- switch (es) {
36
- case ES_8:
37
- tcg_gen_gvec_dup8i(vec_full_reg_offset(reg), 16, 16, c);
38
- break;
39
- case ES_16:
40
- tcg_gen_gvec_dup16i(vec_full_reg_offset(reg), 16, 16, c);
41
- break;
42
- case ES_32:
43
- tcg_gen_gvec_dup32i(vec_full_reg_offset(reg), 16, 16, c);
44
- break;
45
- case ES_64:
46
- gen_gvec_dup64i(reg, c);
47
- break;
48
- default:
49
- g_assert_not_reached();
50
- }
51
-}
52
-
53
-static void zero_vec(uint8_t reg)
54
-{
55
- tcg_gen_gvec_dup8i(vec_full_reg_offset(reg), 16, 16, 0);
56
-}
57
-
58
static void gen_addi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
59
uint64_t b)
60
{
61
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_vgbm(DisasContext *s, DisasOps *o)
62
* Masks for both 64 bit elements of the vector are the same.
63
* Trust tcg to produce a good constant loading.
64
*/
65
- gen_gvec_dup64i(get_field(s, v1),
66
- generate_byte_mask(i2 & 0xff));
67
+ gen_gvec_dup_imm(ES_64, get_field(s, v1),
68
+ generate_byte_mask(i2 & 0xff));
69
} else {
70
TCGv_i64 t = tcg_temp_new_i64();
71
72
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_vgm(DisasContext *s, DisasOps *o)
73
}
74
}
75
76
- gen_gvec_dupi(es, get_field(s, v1), mask);
77
+ gen_gvec_dup_imm(es, get_field(s, v1), mask);
78
return DISAS_NEXT;
79
}
80
81
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_vllez(DisasContext *s, DisasOps *o)
82
83
t = tcg_temp_new_i64();
84
tcg_gen_qemu_ld_i64(t, o->addr1, get_mem_index(s), MO_TE | es);
85
- zero_vec(get_field(s, v1));
86
+ gen_gvec_dup_imm(es, get_field(s, v1), 0);
87
write_vec_element_i64(t, get_field(s, v1), enr, es);
88
tcg_temp_free_i64(t);
89
return DISAS_NEXT;
90
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_vrepi(DisasContext *s, DisasOps *o)
91
return DISAS_NORETURN;
92
}
93
94
- gen_gvec_dupi(es, get_field(s, v1), data);
95
+ gen_gvec_dup_imm(es, get_field(s, v1), data);
96
return DISAS_NEXT;
97
}
98
99
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_vcksm(DisasContext *s, DisasOps *o)
100
read_vec_element_i32(tmp, get_field(s, v2), i, ES_32);
101
tcg_gen_add2_i32(tmp, sum, sum, sum, tmp, tmp);
102
}
103
- zero_vec(get_field(s, v1));
104
+ gen_gvec_dup_imm(ES_32, get_field(s, v1), 0);
105
write_vec_element_i32(sum, get_field(s, v1), 1, ES_32);
106
107
tcg_temp_free_i32(tmp);
108
--
109
2.20.1
110
111
diff view generated by jsdifflib
Deleted patch
1
We can now unify the implementation of the 3 VSPLTI instructions.
2
1
3
Acked-by: David Gibson <david@gibson.dropbear.id.au>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
target/ppc/translate/vmx-impl.inc.c | 32 ++++++++++++++++-------------
7
target/ppc/translate/vsx-impl.inc.c | 2 +-
8
2 files changed, 19 insertions(+), 15 deletions(-)
9
10
diff --git a/target/ppc/translate/vmx-impl.inc.c b/target/ppc/translate/vmx-impl.inc.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/ppc/translate/vmx-impl.inc.c
13
+++ b/target/ppc/translate/vmx-impl.inc.c
14
@@ -XXX,XX +XXX,XX @@ GEN_VXRFORM_DUAL(vcmpbfp, PPC_ALTIVEC, PPC_NONE, \
15
GEN_VXRFORM_DUAL(vcmpgtfp, PPC_ALTIVEC, PPC_NONE, \
16
vcmpgtud, PPC_NONE, PPC2_ALTIVEC_207)
17
18
-#define GEN_VXFORM_DUPI(name, tcg_op, opc2, opc3) \
19
-static void glue(gen_, name)(DisasContext *ctx) \
20
- { \
21
- int simm; \
22
- if (unlikely(!ctx->altivec_enabled)) { \
23
- gen_exception(ctx, POWERPC_EXCP_VPU); \
24
- return; \
25
- } \
26
- simm = SIMM5(ctx->opcode); \
27
- tcg_op(avr_full_offset(rD(ctx->opcode)), 16, 16, simm); \
28
+static void gen_vsplti(DisasContext *ctx, int vece)
29
+{
30
+ int simm;
31
+
32
+ if (unlikely(!ctx->altivec_enabled)) {
33
+ gen_exception(ctx, POWERPC_EXCP_VPU);
34
+ return;
35
}
36
37
-GEN_VXFORM_DUPI(vspltisb, tcg_gen_gvec_dup8i, 6, 12);
38
-GEN_VXFORM_DUPI(vspltish, tcg_gen_gvec_dup16i, 6, 13);
39
-GEN_VXFORM_DUPI(vspltisw, tcg_gen_gvec_dup32i, 6, 14);
40
+ simm = SIMM5(ctx->opcode);
41
+ tcg_gen_gvec_dup_imm(vece, avr_full_offset(rD(ctx->opcode)), 16, 16, simm);
42
+}
43
+
44
+#define GEN_VXFORM_VSPLTI(name, vece, opc2, opc3) \
45
+static void glue(gen_, name)(DisasContext *ctx) { gen_vsplti(ctx, vece); }
46
+
47
+GEN_VXFORM_VSPLTI(vspltisb, MO_8, 6, 12);
48
+GEN_VXFORM_VSPLTI(vspltish, MO_16, 6, 13);
49
+GEN_VXFORM_VSPLTI(vspltisw, MO_32, 6, 14);
50
51
#define GEN_VXFORM_NOA(name, opc2, opc3) \
52
static void glue(gen_, name)(DisasContext *ctx) \
53
@@ -XXX,XX +XXX,XX @@ GEN_VXFORM_DUAL(vsldoi, PPC_ALTIVEC, PPC_NONE,
54
#undef GEN_VXRFORM_DUAL
55
#undef GEN_VXRFORM1
56
#undef GEN_VXRFORM
57
-#undef GEN_VXFORM_DUPI
58
+#undef GEN_VXFORM_VSPLTI
59
#undef GEN_VXFORM_NOA
60
#undef GEN_VXFORM_UIMM
61
#undef GEN_VAFORM_PAIRED
62
diff --git a/target/ppc/translate/vsx-impl.inc.c b/target/ppc/translate/vsx-impl.inc.c
63
index XXXXXXX..XXXXXXX 100644
64
--- a/target/ppc/translate/vsx-impl.inc.c
65
+++ b/target/ppc/translate/vsx-impl.inc.c
66
@@ -XXX,XX +XXX,XX @@ static void gen_xxspltib(DisasContext *ctx)
67
return;
68
}
69
}
70
- tcg_gen_gvec_dup8i(vsr_full_offset(rt), 16, 16, uim8);
71
+ tcg_gen_gvec_dup_imm(MO_8, vsr_full_offset(rt), 16, 16, uim8);
72
}
73
74
static void gen_xxsldwi(DisasContext *ctx)
75
--
76
2.20.1
77
78
diff view generated by jsdifflib
1
For the benefit of compatibility of function pointer types,
1
From: Roman Artemev <roman.artemev@syntacore.com>
2
we have standardized on int32_t and int64_t as the integral
3
argument to tcg expanders.
4
2
5
We converted most of them in 474b2e8f0f7, but missed the rotates.
3
On RISC-V to StoreStore barrier corresponds
4
`fence w, w` not `fence r, r`
6
5
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Cc: qemu-stable@nongnu.org
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Fixes: efbea94c76b ("tcg/riscv: Add slowpath load and store instructions")
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Denis Tomashev <denis.tomashev@syntacore.com>
10
Signed-off-by: Roman Artemev <roman.artemev@syntacore.com>
11
Message-ID: <e2f2131e294a49e79959d4fa9ec02cf4@syntacore.com>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
13
---
11
include/tcg/tcg-op.h | 8 ++++----
14
tcg/riscv/tcg-target.c.inc | 2 +-
12
tcg/tcg-op.c | 16 ++++++++--------
15
1 file changed, 1 insertion(+), 1 deletion(-)
13
2 files changed, 12 insertions(+), 12 deletions(-)
14
16
15
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
17
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
16
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
17
--- a/include/tcg/tcg-op.h
19
--- a/tcg/riscv/tcg-target.c.inc
18
+++ b/include/tcg/tcg-op.h
20
+++ b/tcg/riscv/tcg-target.c.inc
19
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2);
21
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
20
void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg);
22
insn |= 0x02100000;
21
void tcg_gen_ctpop_i32(TCGv_i32 a1, TCGv_i32 a2);
22
void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
23
-void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2);
24
+void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
25
void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
26
-void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2);
27
+void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
28
void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2,
29
unsigned int ofs, unsigned int len);
30
void tcg_gen_deposit_z_i32(TCGv_i32 ret, TCGv_i32 arg,
31
@@ -XXX,XX +XXX,XX @@ void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2);
32
void tcg_gen_clrsb_i64(TCGv_i64 ret, TCGv_i64 arg);
33
void tcg_gen_ctpop_i64(TCGv_i64 a1, TCGv_i64 a2);
34
void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
35
-void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2);
36
+void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
37
void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
38
-void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2);
39
+void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
40
void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2,
41
unsigned int ofs, unsigned int len);
42
void tcg_gen_deposit_z_i64(TCGv_i64 ret, TCGv_i64 arg,
43
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/tcg/tcg-op.c
46
+++ b/tcg/tcg-op.c
47
@@ -XXX,XX +XXX,XX @@ void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
48
}
23
}
24
if (a0 & TCG_MO_ST_ST) {
25
- insn |= 0x02200000;
26
+ insn |= 0x01100000;
27
}
28
tcg_out32(s, insn);
49
}
29
}
50
51
-void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2)
52
+void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
53
{
54
- tcg_debug_assert(arg2 < 32);
55
+ tcg_debug_assert(arg2 >= 0 && arg2 < 32);
56
/* some cases can be optimized here */
57
if (arg2 == 0) {
58
tcg_gen_mov_i32(ret, arg1);
59
@@ -XXX,XX +XXX,XX @@ void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
60
}
61
}
62
63
-void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2)
64
+void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
65
{
66
- tcg_debug_assert(arg2 < 32);
67
+ tcg_debug_assert(arg2 >= 0 && arg2 < 32);
68
/* some cases can be optimized here */
69
if (arg2 == 0) {
70
tcg_gen_mov_i32(ret, arg1);
71
@@ -XXX,XX +XXX,XX @@ void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
72
}
73
}
74
75
-void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2)
76
+void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
77
{
78
- tcg_debug_assert(arg2 < 64);
79
+ tcg_debug_assert(arg2 >= 0 && arg2 < 64);
80
/* some cases can be optimized here */
81
if (arg2 == 0) {
82
tcg_gen_mov_i64(ret, arg1);
83
@@ -XXX,XX +XXX,XX @@ void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
84
}
85
}
86
87
-void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2)
88
+void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
89
{
90
- tcg_debug_assert(arg2 < 64);
91
+ tcg_debug_assert(arg2 >= 0 && arg2 < 64);
92
/* some cases can be optimized here */
93
if (arg2 == 0) {
94
tcg_gen_mov_i64(ret, arg1);
95
--
30
--
96
2.20.1
31
2.43.0
97
98
diff view generated by jsdifflib
1
We have this same parameter for GVecGen2i, GVecGen3,
1
This allows targets to declare that the helper requires a
2
and GVecGen3i. This will make some SVE2 insns easier
2
float_status pointer and instead of a generic void pointer.
3
to parameterize.
4
3
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
include/tcg/tcg-op-gvec.h | 2 ++
7
include/exec/helper-head.h.inc | 3 +++
9
tcg/tcg-op-gvec.c | 45 ++++++++++++++++++++++++++++-----------
8
1 file changed, 3 insertions(+)
10
2 files changed, 34 insertions(+), 13 deletions(-)
11
9
12
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
10
diff --git a/include/exec/helper-head.h.inc b/include/exec/helper-head.h.inc
13
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
14
--- a/include/tcg/tcg-op-gvec.h
12
--- a/include/exec/helper-head.h.inc
15
+++ b/include/tcg/tcg-op-gvec.h
13
+++ b/include/exec/helper-head.h.inc
16
@@ -XXX,XX +XXX,XX @@ typedef struct {
14
@@ -XXX,XX +XXX,XX @@
17
uint8_t vece;
15
#define dh_alias_ptr ptr
18
/* Prefer i64 to v64. */
16
#define dh_alias_cptr ptr
19
bool prefer_i64;
17
#define dh_alias_env ptr
20
+ /* Load dest as a 2nd source operand. */
18
+#define dh_alias_fpst ptr
21
+ bool load_dest;
19
#define dh_alias_void void
22
} GVecGen2;
20
#define dh_alias_noreturn noreturn
23
21
#define dh_alias(t) glue(dh_alias_, t)
24
typedef struct {
22
@@ -XXX,XX +XXX,XX @@
25
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
23
#define dh_ctype_ptr void *
26
index XXXXXXX..XXXXXXX 100644
24
#define dh_ctype_cptr const void *
27
--- a/tcg/tcg-op-gvec.c
25
#define dh_ctype_env CPUArchState *
28
+++ b/tcg/tcg-op-gvec.c
26
+#define dh_ctype_fpst float_status *
29
@@ -XXX,XX +XXX,XX @@ static void expand_clr(uint32_t dofs, uint32_t maxsz)
27
#define dh_ctype_void void
30
28
#define dh_ctype_noreturn G_NORETURN void
31
/* Expand OPSZ bytes worth of two-operand operations using i32 elements. */
29
#define dh_ctype(t) dh_ctype_##t
32
static void expand_2_i32(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
30
@@ -XXX,XX +XXX,XX @@
33
- void (*fni)(TCGv_i32, TCGv_i32))
31
#define dh_typecode_f64 dh_typecode_i64
34
+ bool load_dest, void (*fni)(TCGv_i32, TCGv_i32))
32
#define dh_typecode_cptr dh_typecode_ptr
35
{
33
#define dh_typecode_env dh_typecode_ptr
36
TCGv_i32 t0 = tcg_temp_new_i32();
34
+#define dh_typecode_fpst dh_typecode_ptr
37
+ TCGv_i32 t1 = tcg_temp_new_i32();
35
#define dh_typecode(t) dh_typecode_##t
38
uint32_t i;
36
39
37
#define dh_callflag_i32 0
40
for (i = 0; i < oprsz; i += 4) {
41
tcg_gen_ld_i32(t0, cpu_env, aofs + i);
42
- fni(t0, t0);
43
- tcg_gen_st_i32(t0, cpu_env, dofs + i);
44
+ if (load_dest) {
45
+ tcg_gen_ld_i32(t1, cpu_env, dofs + i);
46
+ }
47
+ fni(t1, t0);
48
+ tcg_gen_st_i32(t1, cpu_env, dofs + i);
49
}
50
tcg_temp_free_i32(t0);
51
+ tcg_temp_free_i32(t1);
52
}
53
54
static void expand_2i_i32(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
55
@@ -XXX,XX +XXX,XX @@ static void expand_4_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs,
56
57
/* Expand OPSZ bytes worth of two-operand operations using i64 elements. */
58
static void expand_2_i64(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
59
- void (*fni)(TCGv_i64, TCGv_i64))
60
+ bool load_dest, void (*fni)(TCGv_i64, TCGv_i64))
61
{
62
TCGv_i64 t0 = tcg_temp_new_i64();
63
+ TCGv_i64 t1 = tcg_temp_new_i64();
64
uint32_t i;
65
66
for (i = 0; i < oprsz; i += 8) {
67
tcg_gen_ld_i64(t0, cpu_env, aofs + i);
68
- fni(t0, t0);
69
- tcg_gen_st_i64(t0, cpu_env, dofs + i);
70
+ if (load_dest) {
71
+ tcg_gen_ld_i64(t1, cpu_env, dofs + i);
72
+ }
73
+ fni(t1, t0);
74
+ tcg_gen_st_i64(t1, cpu_env, dofs + i);
75
}
76
tcg_temp_free_i64(t0);
77
+ tcg_temp_free_i64(t1);
78
}
79
80
static void expand_2i_i64(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
81
@@ -XXX,XX +XXX,XX @@ static void expand_4_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs,
82
/* Expand OPSZ bytes worth of two-operand operations using host vectors. */
83
static void expand_2_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
84
uint32_t oprsz, uint32_t tysz, TCGType type,
85
+ bool load_dest,
86
void (*fni)(unsigned, TCGv_vec, TCGv_vec))
87
{
88
TCGv_vec t0 = tcg_temp_new_vec(type);
89
+ TCGv_vec t1 = tcg_temp_new_vec(type);
90
uint32_t i;
91
92
for (i = 0; i < oprsz; i += tysz) {
93
tcg_gen_ld_vec(t0, cpu_env, aofs + i);
94
- fni(vece, t0, t0);
95
- tcg_gen_st_vec(t0, cpu_env, dofs + i);
96
+ if (load_dest) {
97
+ tcg_gen_ld_vec(t1, cpu_env, dofs + i);
98
+ }
99
+ fni(vece, t1, t0);
100
+ tcg_gen_st_vec(t1, cpu_env, dofs + i);
101
}
102
tcg_temp_free_vec(t0);
103
+ tcg_temp_free_vec(t1);
104
}
105
106
/* Expand OPSZ bytes worth of two-vector operands and an immediate operand
107
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs,
108
* that e.g. size == 80 would be expanded with 2x32 + 1x16.
109
*/
110
some = QEMU_ALIGN_DOWN(oprsz, 32);
111
- expand_2_vec(g->vece, dofs, aofs, some, 32, TCG_TYPE_V256, g->fniv);
112
+ expand_2_vec(g->vece, dofs, aofs, some, 32, TCG_TYPE_V256,
113
+ g->load_dest, g->fniv);
114
if (some == oprsz) {
115
break;
116
}
117
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs,
118
maxsz -= some;
119
/* fallthru */
120
case TCG_TYPE_V128:
121
- expand_2_vec(g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128, g->fniv);
122
+ expand_2_vec(g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128,
123
+ g->load_dest, g->fniv);
124
break;
125
case TCG_TYPE_V64:
126
- expand_2_vec(g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64, g->fniv);
127
+ expand_2_vec(g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64,
128
+ g->load_dest, g->fniv);
129
break;
130
131
case 0:
132
if (g->fni8 && check_size_impl(oprsz, 8)) {
133
- expand_2_i64(dofs, aofs, oprsz, g->fni8);
134
+ expand_2_i64(dofs, aofs, oprsz, g->load_dest, g->fni8);
135
} else if (g->fni4 && check_size_impl(oprsz, 4)) {
136
- expand_2_i32(dofs, aofs, oprsz, g->fni4);
137
+ expand_2_i32(dofs, aofs, oprsz, g->load_dest, g->fni4);
138
} else {
139
assert(g->fno != NULL);
140
tcg_gen_gvec_2_ool(dofs, aofs, oprsz, maxsz, g->data, g->fno);
141
--
38
--
142
2.20.1
39
2.43.0
143
40
144
41
diff view generated by jsdifflib
1
In a few cases, we're able to remove some manual replication.
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Rather than manually copying each register, use
4
the libc memcpy(), which is well optimized nowadays.
5
6
Suggested-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
7
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
Message-ID: <20241205205418.67613-1-philmd@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
12
---
6
target/arm/translate-a64.c | 10 +++++-----
13
target/sparc/win_helper.c | 26 ++++++++------------------
7
target/arm/translate-sve.c | 12 +++++-------
14
1 file changed, 8 insertions(+), 18 deletions(-)
8
target/arm/translate.c | 9 ++++++---
9
3 files changed, 16 insertions(+), 15 deletions(-)
10
15
11
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
16
diff --git a/target/sparc/win_helper.c b/target/sparc/win_helper.c
12
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
13
--- a/target/arm/translate-a64.c
18
--- a/target/sparc/win_helper.c
14
+++ b/target/arm/translate-a64.c
19
+++ b/target/sparc/win_helper.c
15
@@ -XXX,XX +XXX,XX @@ static void clear_vec_high(DisasContext *s, bool is_q, int rd)
20
@@ -XXX,XX +XXX,XX @@
16
tcg_temp_free_i64(tcg_zero);
21
#include "exec/helper-proto.h"
22
#include "trace.h"
23
24
-static inline void memcpy32(target_ulong *dst, const target_ulong *src)
25
-{
26
- dst[0] = src[0];
27
- dst[1] = src[1];
28
- dst[2] = src[2];
29
- dst[3] = src[3];
30
- dst[4] = src[4];
31
- dst[5] = src[5];
32
- dst[6] = src[6];
33
- dst[7] = src[7];
34
-}
35
-
36
void cpu_set_cwp(CPUSPARCState *env, int new_cwp)
37
{
38
/* put the modified wrap registers at their proper location */
39
if (env->cwp == env->nwindows - 1) {
40
- memcpy32(env->regbase, env->regbase + env->nwindows * 16);
41
+ memcpy(env->regbase, env->regbase + env->nwindows * 16,
42
+ sizeof(env->gregs));
17
}
43
}
18
if (vsz > 16) {
44
env->cwp = new_cwp;
19
- tcg_gen_gvec_dup8i(ofs + 16, vsz - 16, vsz - 16, 0);
45
20
+ tcg_gen_gvec_dup_imm(MO_64, ofs + 16, vsz - 16, vsz - 16, 0);
46
/* put the wrap registers at their temporary location */
47
if (new_cwp == env->nwindows - 1) {
48
- memcpy32(env->regbase + env->nwindows * 16, env->regbase);
49
+ memcpy(env->regbase + env->nwindows * 16, env->regbase,
50
+ sizeof(env->gregs));
51
}
52
env->regwptr = env->regbase + (new_cwp * 16);
53
}
54
@@ -XXX,XX +XXX,XX @@ void cpu_gl_switch_gregs(CPUSPARCState *env, uint32_t new_gl)
55
dst = get_gl_gregset(env, env->gl);
56
57
if (src != dst) {
58
- memcpy32(dst, env->gregs);
59
- memcpy32(env->gregs, src);
60
+ memcpy(dst, env->gregs, sizeof(env->gregs));
61
+ memcpy(env->gregs, src, sizeof(env->gregs));
21
}
62
}
22
}
63
}
23
64
24
@@ -XXX,XX +XXX,XX @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
65
@@ -XXX,XX +XXX,XX @@ void cpu_change_pstate(CPUSPARCState *env, uint32_t new_pstate)
25
66
/* Switch global register bank */
26
if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
67
src = get_gregset(env, new_pstate_regs);
27
/* MOVI or MVNI, with MVNI negation handled above. */
68
dst = get_gregset(env, pstate_regs);
28
- tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), is_q ? 16 : 8,
69
- memcpy32(dst, env->gregs);
29
- vec_full_reg_size(s), imm);
70
- memcpy32(env->gregs, src);
30
+ tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8,
71
+ memcpy(dst, env->gregs, sizeof(env->gregs));
31
+ vec_full_reg_size(s), imm);
72
+ memcpy(env->gregs, src, sizeof(env->gregs));
32
} else {
73
} else {
33
/* ORR or BIC, with BIC negation to AND handled above. */
74
trace_win_helper_no_switch_pstate(new_pstate_regs);
34
if (is_neg) {
35
@@ -XXX,XX +XXX,XX @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
36
if (is_u) {
37
if (shift == 8 << size) {
38
/* Shift count the same size as element size produces zero. */
39
- tcg_gen_gvec_dup8i(vec_full_reg_offset(s, rd),
40
- is_q ? 16 : 8, vec_full_reg_size(s), 0);
41
+ tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd),
42
+ is_q ? 16 : 8, vec_full_reg_size(s), 0);
43
} else {
44
gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shri, size);
45
}
46
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/target/arm/translate-sve.c
49
+++ b/target/arm/translate-sve.c
50
@@ -XXX,XX +XXX,XX @@ static bool do_mov_z(DisasContext *s, int rd, int rn)
51
static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
52
{
53
unsigned vsz = vec_full_reg_size(s);
54
- tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
55
+ tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
56
}
57
58
/* Invoke a vector expander on two Pregs. */
59
@@ -XXX,XX +XXX,XX @@ static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
60
unsigned oprsz = size_for_gvec(setsz / 8);
61
62
if (oprsz * 8 == setsz) {
63
- tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
64
+ tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
65
goto done;
66
}
67
}
75
}
68
@@ -XXX,XX +XXX,XX @@ static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
69
unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
70
tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
71
} else {
72
- tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
73
+ tcg_gen_gvec_dup_imm(esz, dofs, vsz, vsz, 0);
74
}
75
}
76
return true;
77
@@ -XXX,XX +XXX,XX @@ static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
78
79
/* Decode the VFP immediate. */
80
imm = vfp_expand_imm(a->esz, a->imm);
81
- imm = dup_const(a->esz, imm);
82
-
83
- tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
84
+ tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
85
}
86
return true;
87
}
88
@@ -XXX,XX +XXX,XX @@ static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
89
unsigned vsz = vec_full_reg_size(s);
90
int dofs = vec_full_reg_offset(s, a->rd);
91
92
- tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
93
+ tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
94
}
95
return true;
96
}
97
diff --git a/target/arm/translate.c b/target/arm/translate.c
98
index XXXXXXX..XXXXXXX 100644
99
--- a/target/arm/translate.c
100
+++ b/target/arm/translate.c
101
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
102
MIN(shift, (8 << size) - 1),
103
vec_size, vec_size);
104
} else if (shift >= 8 << size) {
105
- tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0);
106
+ tcg_gen_gvec_dup_imm(MO_8, rd_ofs, vec_size,
107
+ vec_size, 0);
108
} else {
109
tcg_gen_gvec_shri(size, rd_ofs, rm_ofs, shift,
110
vec_size, vec_size);
111
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
112
* architecturally valid and results in zero.
113
*/
114
if (shift >= 8 << size) {
115
- tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0);
116
+ tcg_gen_gvec_dup_imm(size, rd_ofs,
117
+ vec_size, vec_size, 0);
118
} else {
119
tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift,
120
vec_size, vec_size);
121
@@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
122
}
123
tcg_temp_free_i64(t64);
124
} else {
125
- tcg_gen_gvec_dup32i(reg_ofs, vec_size, vec_size, imm);
126
+ tcg_gen_gvec_dup_imm(MO_32, reg_ofs, vec_size,
127
+ vec_size, imm);
128
}
129
}
130
}
131
--
76
--
132
2.20.1
77
2.43.0
133
78
134
79
diff view generated by jsdifflib
Deleted patch
1
Replace the outgoing interface.
2
1
3
Reviewed-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/tcg-op-gvec.c | 8 ++++----
8
1 file changed, 4 insertions(+), 4 deletions(-)
9
10
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/tcg-op-gvec.c
13
+++ b/tcg/tcg-op-gvec.c
14
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_xor(unsigned vece, uint32_t dofs, uint32_t aofs,
15
};
16
17
if (aofs == bofs) {
18
- tcg_gen_gvec_dup8i(dofs, oprsz, maxsz, 0);
19
+ tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, 0);
20
} else {
21
tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g);
22
}
23
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_andc(unsigned vece, uint32_t dofs, uint32_t aofs,
24
};
25
26
if (aofs == bofs) {
27
- tcg_gen_gvec_dup8i(dofs, oprsz, maxsz, 0);
28
+ tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, 0);
29
} else {
30
tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g);
31
}
32
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_orc(unsigned vece, uint32_t dofs, uint32_t aofs,
33
};
34
35
if (aofs == bofs) {
36
- tcg_gen_gvec_dup8i(dofs, oprsz, maxsz, -1);
37
+ tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, -1);
38
} else {
39
tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g);
40
}
41
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_eqv(unsigned vece, uint32_t dofs, uint32_t aofs,
42
};
43
44
if (aofs == bofs) {
45
- tcg_gen_gvec_dup8i(dofs, oprsz, maxsz, -1);
46
+ tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, -1);
47
} else {
48
tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g);
49
}
50
--
51
2.20.1
52
53
diff view generated by jsdifflib
Deleted patch
1
These interfaces are now unused.
2
1
3
Reviewed-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
4
Reviewed-by: David Hildenbrand <david@redhat.com>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
include/tcg/tcg-op-gvec.h | 5 -----
9
tcg/tcg-op-gvec.c | 28 ----------------------------
10
2 files changed, 33 deletions(-)
11
12
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/include/tcg/tcg-op-gvec.h
15
+++ b/include/tcg/tcg-op-gvec.h
16
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_dup_i32(unsigned vece, uint32_t dofs, uint32_t s,
17
void tcg_gen_gvec_dup_i64(unsigned vece, uint32_t dofs, uint32_t s,
18
uint32_t m, TCGv_i64);
19
20
-void tcg_gen_gvec_dup8i(uint32_t dofs, uint32_t s, uint32_t m, uint8_t x);
21
-void tcg_gen_gvec_dup16i(uint32_t dofs, uint32_t s, uint32_t m, uint16_t x);
22
-void tcg_gen_gvec_dup32i(uint32_t dofs, uint32_t s, uint32_t m, uint32_t x);
23
-void tcg_gen_gvec_dup64i(uint32_t dofs, uint32_t s, uint32_t m, uint64_t x);
24
-
25
void tcg_gen_gvec_shli(unsigned vece, uint32_t dofs, uint32_t aofs,
26
int64_t shift, uint32_t oprsz, uint32_t maxsz);
27
void tcg_gen_gvec_shri(unsigned vece, uint32_t dofs, uint32_t aofs,
28
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/tcg/tcg-op-gvec.c
31
+++ b/tcg/tcg-op-gvec.c
32
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs,
33
}
34
}
35
36
-void tcg_gen_gvec_dup64i(uint32_t dofs, uint32_t oprsz,
37
- uint32_t maxsz, uint64_t x)
38
-{
39
- check_size_align(oprsz, maxsz, dofs);
40
- do_dup(MO_64, dofs, oprsz, maxsz, NULL, NULL, x);
41
-}
42
-
43
-void tcg_gen_gvec_dup32i(uint32_t dofs, uint32_t oprsz,
44
- uint32_t maxsz, uint32_t x)
45
-{
46
- check_size_align(oprsz, maxsz, dofs);
47
- do_dup(MO_32, dofs, oprsz, maxsz, NULL, NULL, x);
48
-}
49
-
50
-void tcg_gen_gvec_dup16i(uint32_t dofs, uint32_t oprsz,
51
- uint32_t maxsz, uint16_t x)
52
-{
53
- check_size_align(oprsz, maxsz, dofs);
54
- do_dup(MO_16, dofs, oprsz, maxsz, NULL, NULL, x);
55
-}
56
-
57
-void tcg_gen_gvec_dup8i(uint32_t dofs, uint32_t oprsz,
58
- uint32_t maxsz, uint8_t x)
59
-{
60
- check_size_align(oprsz, maxsz, dofs);
61
- do_dup(MO_8, dofs, oprsz, maxsz, NULL, NULL, x);
62
-}
63
-
64
void tcg_gen_gvec_dup_imm(unsigned vece, uint32_t dofs, uint32_t oprsz,
65
uint32_t maxsz, uint64_t x)
66
{
67
--
68
2.20.1
69
70
diff view generated by jsdifflib
Deleted patch
1
For use when a target needs to pass a configure-specific
2
target_ulong value to duplicate.
3
1
4
Reviewed-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
5
Reviewed-by: David Hildenbrand <david@redhat.com>
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
include/tcg/tcg-op-gvec.h | 6 ++++++
10
1 file changed, 6 insertions(+)
11
12
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/include/tcg/tcg-op-gvec.h
15
+++ b/include/tcg/tcg-op-gvec.h
16
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_dup_i32(unsigned vece, uint32_t dofs, uint32_t s,
17
void tcg_gen_gvec_dup_i64(unsigned vece, uint32_t dofs, uint32_t s,
18
uint32_t m, TCGv_i64);
19
20
+#if TARGET_LONG_BITS == 64
21
+# define tcg_gen_gvec_dup_tl tcg_gen_gvec_dup_i64
22
+#else
23
+# define tcg_gen_gvec_dup_tl tcg_gen_gvec_dup_i32
24
+#endif
25
+
26
void tcg_gen_gvec_shli(unsigned vece, uint32_t dofs, uint32_t aofs,
27
int64_t shift, uint32_t oprsz, uint32_t maxsz);
28
void tcg_gen_gvec_shri(unsigned vece, uint32_t dofs, uint32_t aofs,
29
--
30
2.20.1
31
32
diff view generated by jsdifflib
Deleted patch
1
Better handling of non-power-of-2 tails as seen with Arm 8-byte
2
vector operations.
3
1
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/tcg-op-gvec.c | 82 ++++++++++++++++++++++++++++++++++++-----------
8
1 file changed, 63 insertions(+), 19 deletions(-)
9
10
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/tcg-op-gvec.c
13
+++ b/tcg/tcg-op-gvec.c
14
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_5_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
15
in units of LNSZ. This limits the expansion of inline code. */
16
static inline bool check_size_impl(uint32_t oprsz, uint32_t lnsz)
17
{
18
- if (oprsz % lnsz == 0) {
19
- uint32_t lnct = oprsz / lnsz;
20
- return lnct >= 1 && lnct <= MAX_UNROLL;
21
+ uint32_t q, r;
22
+
23
+ if (oprsz < lnsz) {
24
+ return false;
25
}
26
- return false;
27
+
28
+ q = oprsz / lnsz;
29
+ r = oprsz % lnsz;
30
+ tcg_debug_assert((r & 7) == 0);
31
+
32
+ if (lnsz < 16) {
33
+ /* For sizes below 16, accept no remainder. */
34
+ if (r != 0) {
35
+ return false;
36
+ }
37
+ } else {
38
+ /*
39
+ * Recall that ARM SVE allows vector sizes that are not a
40
+ * power of 2, but always a multiple of 16. The intent is
41
+ * that e.g. size == 80 would be expanded with 2x32 + 1x16.
42
+ * In addition, expand_clr needs to handle a multiple of 8.
43
+ * Thus we can handle the tail with one more operation per
44
+ * diminishing power of 2.
45
+ */
46
+ q += ctpop32(r);
47
+ }
48
+
49
+ return q <= MAX_UNROLL;
50
}
51
52
static void expand_clr(uint32_t dofs, uint32_t maxsz);
53
@@ -XXX,XX +XXX,XX @@ static void gen_dup_i64(unsigned vece, TCGv_i64 out, TCGv_i64 in)
54
static TCGType choose_vector_type(const TCGOpcode *list, unsigned vece,
55
uint32_t size, bool prefer_i64)
56
{
57
- if (TCG_TARGET_HAS_v256 && check_size_impl(size, 32)) {
58
- /*
59
- * Recall that ARM SVE allows vector sizes that are not a
60
- * power of 2, but always a multiple of 16. The intent is
61
- * that e.g. size == 80 would be expanded with 2x32 + 1x16.
62
- * It is hard to imagine a case in which v256 is supported
63
- * but v128 is not, but check anyway.
64
- */
65
- if (tcg_can_emit_vecop_list(list, TCG_TYPE_V256, vece)
66
- && (size % 32 == 0
67
- || tcg_can_emit_vecop_list(list, TCG_TYPE_V128, vece))) {
68
- return TCG_TYPE_V256;
69
- }
70
+ /*
71
+ * Recall that ARM SVE allows vector sizes that are not a
72
+ * power of 2, but always a multiple of 16. The intent is
73
+ * that e.g. size == 80 would be expanded with 2x32 + 1x16.
74
+ * It is hard to imagine a case in which v256 is supported
75
+ * but v128 is not, but check anyway.
76
+ * In addition, expand_clr needs to handle a multiple of 8.
77
+ */
78
+ if (TCG_TARGET_HAS_v256 &&
79
+ check_size_impl(size, 32) &&
80
+ tcg_can_emit_vecop_list(list, TCG_TYPE_V256, vece) &&
81
+ (!(size & 16) ||
82
+ (TCG_TARGET_HAS_v128 &&
83
+ tcg_can_emit_vecop_list(list, TCG_TYPE_V128, vece))) &&
84
+ (!(size & 8) ||
85
+ (TCG_TARGET_HAS_v64 &&
86
+ tcg_can_emit_vecop_list(list, TCG_TYPE_V64, vece)))) {
87
+ return TCG_TYPE_V256;
88
}
89
- if (TCG_TARGET_HAS_v128 && check_size_impl(size, 16)
90
- && tcg_can_emit_vecop_list(list, TCG_TYPE_V128, vece)) {
91
+ if (TCG_TARGET_HAS_v128 &&
92
+ check_size_impl(size, 16) &&
93
+ tcg_can_emit_vecop_list(list, TCG_TYPE_V128, vece) &&
94
+ (!(size & 8) ||
95
+ (TCG_TARGET_HAS_v64 &&
96
+ tcg_can_emit_vecop_list(list, TCG_TYPE_V64, vece)))) {
97
return TCG_TYPE_V128;
98
}
99
if (TCG_TARGET_HAS_v64 && !prefer_i64 && check_size_impl(size, 8)
100
@@ -XXX,XX +XXX,XX @@ static void do_dup_store(TCGType type, uint32_t dofs, uint32_t oprsz,
101
{
102
uint32_t i = 0;
103
104
+ tcg_debug_assert(oprsz >= 8);
105
+
106
+ /*
107
+ * This may be expand_clr for the tail of an operation, e.g.
108
+ * oprsz == 8 && maxsz == 64. The first 8 bytes of this store
109
+ * are misaligned wrt the maximum vector size, so do that first.
110
+ */
111
+ if (dofs & 8) {
112
+ tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V64);
113
+ i += 8;
114
+ }
115
+
116
switch (type) {
117
case TCG_TYPE_V256:
118
/*
119
--
120
2.20.1
121
122
diff view generated by jsdifflib