1 | The following changes since commit a36d64f43325fa503075cc9408ddabb69b32f829: | 1 | Folding in a target/alpha patch since both queues |
---|---|---|---|
2 | are singletons this time. | ||
2 | 3 | ||
3 | Merge remote-tracking branch 'remotes/stsquad/tags/pull-testing-and-gdbstub-060520-1' into staging (2020-05-06 14:06:00 +0100) | 4 | |
5 | r~ | ||
6 | |||
7 | |||
8 | The following changes since commit 25d75c99b2e5941c67049ee776efdb226414f4c6: | ||
9 | |||
10 | Merge remote-tracking branch 'remotes/xtensa/tags/20210403-xtensa' into staging (2021-04-04 21:48:45 +0100) | ||
4 | 11 | ||
5 | are available in the Git repository at: | 12 | are available in the Git repository at: |
6 | 13 | ||
7 | https://github.com/rth7680/qemu.git tags/pull-tcg-20200506 | 14 | https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20210405 |
8 | 15 | ||
9 | for you to fetch changes up to 07dada0336a83002dfa8673a9220a88e13d9a45c: | 16 | for you to fetch changes up to ef951ee33fba780dd6c2b7f8ff25c84c3f87a6b8: |
10 | 17 | ||
11 | tcg: Fix integral argument type to tcg_gen_rot[rl]i_i{32,64} (2020-05-06 09:25:10 -0700) | 18 | target/alpha: fix icount handling for timer instructions (2021-04-05 07:32:56 -0700) |
12 | 19 | ||
13 | ---------------------------------------------------------------- | 20 | ---------------------------------------------------------------- |
14 | Add tcg_gen_gvec_dup_imm | 21 | tcg/mips tlb lookup fix |
15 | Misc tcg patches | 22 | target/alpha icount fix |
16 | 23 | ||
17 | ---------------------------------------------------------------- | 24 | ---------------------------------------------------------------- |
18 | Richard Henderson (10): | 25 | Kele Huang (1): |
19 | tcg: Add tcg_gen_gvec_dup_imm | 26 | tcg/mips: Fix SoftTLB comparison on mips backend |
20 | target/s390x: Use tcg_gen_gvec_dup_imm | ||
21 | target/ppc: Use tcg_gen_gvec_dup_imm | ||
22 | target/arm: Use tcg_gen_gvec_dup_imm | ||
23 | tcg: Use tcg_gen_gvec_dup_imm in logical simplifications | ||
24 | tcg: Remove tcg_gen_gvec_dup{8,16,32,64}i | ||
25 | tcg: Add tcg_gen_gvec_dup_tl | ||
26 | tcg: Improve vector tail clearing | ||
27 | tcg: Add load_dest parameter to GVecGen2 | ||
28 | tcg: Fix integral argument type to tcg_gen_rot[rl]i_i{32,64} | ||
29 | 27 | ||
30 | include/tcg/tcg-op-gvec.h | 13 ++- | 28 | Pavel Dovgalyuk (1): |
31 | include/tcg/tcg-op.h | 8 +- | 29 | target/alpha: fix icount handling for timer instructions |
32 | target/arm/translate-a64.c | 10 +-- | ||
33 | target/arm/translate-sve.c | 12 ++- | ||
34 | target/arm/translate.c | 9 +- | ||
35 | target/ppc/translate/vmx-impl.inc.c | 32 +++---- | ||
36 | target/ppc/translate/vsx-impl.inc.c | 2 +- | ||
37 | target/s390x/translate_vx.inc.c | 41 ++------- | ||
38 | tcg/tcg-op-gvec.c | 162 +++++++++++++++++++++++------------- | ||
39 | tcg/tcg-op.c | 16 ++-- | ||
40 | 10 files changed, 166 insertions(+), 139 deletions(-) | ||
41 | 30 | ||
31 | target/alpha/translate.c | 9 +++++++-- | ||
32 | tcg/mips/tcg-target.c.inc | 2 +- | ||
33 | 2 files changed, 8 insertions(+), 3 deletions(-) | ||
34 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Add a version of tcg_gen_dup_* that takes both immediate and | ||
2 | a vector element size operand. This will replace the set of | ||
3 | tcg_gen_gvec_dup{8,16,32,64}i functions that encode the element | ||
4 | size within the function name. | ||
5 | 1 | ||
6 | Reviewed-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
7 | Reviewed-by: David Hildenbrand <david@redhat.com> | ||
8 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | --- | ||
11 | include/tcg/tcg-op-gvec.h | 2 ++ | ||
12 | tcg/tcg-op-gvec.c | 7 +++++++ | ||
13 | 2 files changed, 9 insertions(+) | ||
14 | |||
15 | diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/include/tcg/tcg-op-gvec.h | ||
18 | +++ b/include/tcg/tcg-op-gvec.h | ||
19 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_ors(unsigned vece, uint32_t dofs, uint32_t aofs, | ||
20 | |||
21 | void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs, | ||
22 | uint32_t s, uint32_t m); | ||
23 | +void tcg_gen_gvec_dup_imm(unsigned vece, uint32_t dofs, uint32_t s, | ||
24 | + uint32_t m, uint64_t imm); | ||
25 | void tcg_gen_gvec_dup_i32(unsigned vece, uint32_t dofs, uint32_t s, | ||
26 | uint32_t m, TCGv_i32); | ||
27 | void tcg_gen_gvec_dup_i64(unsigned vece, uint32_t dofs, uint32_t s, | ||
28 | diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/tcg/tcg-op-gvec.c | ||
31 | +++ b/tcg/tcg-op-gvec.c | ||
32 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_dup8i(uint32_t dofs, uint32_t oprsz, | ||
33 | do_dup(MO_8, dofs, oprsz, maxsz, NULL, NULL, x); | ||
34 | } | ||
35 | |||
36 | +void tcg_gen_gvec_dup_imm(unsigned vece, uint32_t dofs, uint32_t oprsz, | ||
37 | + uint32_t maxsz, uint64_t x) | ||
38 | +{ | ||
39 | + check_size_align(oprsz, maxsz, dofs); | ||
40 | + do_dup(vece, dofs, oprsz, maxsz, NULL, NULL, x); | ||
41 | +} | ||
42 | + | ||
43 | void tcg_gen_gvec_not(unsigned vece, uint32_t dofs, uint32_t aofs, | ||
44 | uint32_t oprsz, uint32_t maxsz) | ||
45 | { | ||
46 | -- | ||
47 | 2.20.1 | ||
48 | |||
49 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | The gen_gvec_dupi switch is unnecessary with the new function. | ||
2 | Replace it with a local gen_gvec_dup_imm that takes care of the | ||
3 | register to offset conversion and length arguments. | ||
4 | 1 | ||
5 | Drop zero_vec and use use gen_gvec_dup_imm with 0. | ||
6 | |||
7 | Reviewed-by: David Hildenbrand <david@redhat.com> | ||
8 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | --- | ||
11 | target/s390x/translate_vx.inc.c | 41 +++++++-------------------------- | ||
12 | 1 file changed, 8 insertions(+), 33 deletions(-) | ||
13 | |||
14 | diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/target/s390x/translate_vx.inc.c | ||
17 | +++ b/target/s390x/translate_vx.inc.c | ||
18 | @@ -XXX,XX +XXX,XX @@ static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr, | ||
19 | #define gen_gvec_mov(v1, v2) \ | ||
20 | tcg_gen_gvec_mov(0, vec_full_reg_offset(v1), vec_full_reg_offset(v2), 16, \ | ||
21 | 16) | ||
22 | -#define gen_gvec_dup64i(v1, c) \ | ||
23 | - tcg_gen_gvec_dup64i(vec_full_reg_offset(v1), 16, 16, c) | ||
24 | +#define gen_gvec_dup_imm(es, v1, c) \ | ||
25 | + tcg_gen_gvec_dup_imm(es, vec_full_reg_offset(v1), 16, 16, c); | ||
26 | #define gen_gvec_fn_2(fn, es, v1, v2) \ | ||
27 | tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ | ||
28 | 16, 16) | ||
29 | @@ -XXX,XX +XXX,XX @@ static void gen_gvec128_4_i64(gen_gvec128_4_i64_fn fn, uint8_t d, uint8_t a, | ||
30 | tcg_temp_free_i64(cl); | ||
31 | } | ||
32 | |||
33 | -static void gen_gvec_dupi(uint8_t es, uint8_t reg, uint64_t c) | ||
34 | -{ | ||
35 | - switch (es) { | ||
36 | - case ES_8: | ||
37 | - tcg_gen_gvec_dup8i(vec_full_reg_offset(reg), 16, 16, c); | ||
38 | - break; | ||
39 | - case ES_16: | ||
40 | - tcg_gen_gvec_dup16i(vec_full_reg_offset(reg), 16, 16, c); | ||
41 | - break; | ||
42 | - case ES_32: | ||
43 | - tcg_gen_gvec_dup32i(vec_full_reg_offset(reg), 16, 16, c); | ||
44 | - break; | ||
45 | - case ES_64: | ||
46 | - gen_gvec_dup64i(reg, c); | ||
47 | - break; | ||
48 | - default: | ||
49 | - g_assert_not_reached(); | ||
50 | - } | ||
51 | -} | ||
52 | - | ||
53 | -static void zero_vec(uint8_t reg) | ||
54 | -{ | ||
55 | - tcg_gen_gvec_dup8i(vec_full_reg_offset(reg), 16, 16, 0); | ||
56 | -} | ||
57 | - | ||
58 | static void gen_addi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah, | ||
59 | uint64_t b) | ||
60 | { | ||
61 | @@ -XXX,XX +XXX,XX @@ static DisasJumpType op_vgbm(DisasContext *s, DisasOps *o) | ||
62 | * Masks for both 64 bit elements of the vector are the same. | ||
63 | * Trust tcg to produce a good constant loading. | ||
64 | */ | ||
65 | - gen_gvec_dup64i(get_field(s, v1), | ||
66 | - generate_byte_mask(i2 & 0xff)); | ||
67 | + gen_gvec_dup_imm(ES_64, get_field(s, v1), | ||
68 | + generate_byte_mask(i2 & 0xff)); | ||
69 | } else { | ||
70 | TCGv_i64 t = tcg_temp_new_i64(); | ||
71 | |||
72 | @@ -XXX,XX +XXX,XX @@ static DisasJumpType op_vgm(DisasContext *s, DisasOps *o) | ||
73 | } | ||
74 | } | ||
75 | |||
76 | - gen_gvec_dupi(es, get_field(s, v1), mask); | ||
77 | + gen_gvec_dup_imm(es, get_field(s, v1), mask); | ||
78 | return DISAS_NEXT; | ||
79 | } | ||
80 | |||
81 | @@ -XXX,XX +XXX,XX @@ static DisasJumpType op_vllez(DisasContext *s, DisasOps *o) | ||
82 | |||
83 | t = tcg_temp_new_i64(); | ||
84 | tcg_gen_qemu_ld_i64(t, o->addr1, get_mem_index(s), MO_TE | es); | ||
85 | - zero_vec(get_field(s, v1)); | ||
86 | + gen_gvec_dup_imm(es, get_field(s, v1), 0); | ||
87 | write_vec_element_i64(t, get_field(s, v1), enr, es); | ||
88 | tcg_temp_free_i64(t); | ||
89 | return DISAS_NEXT; | ||
90 | @@ -XXX,XX +XXX,XX @@ static DisasJumpType op_vrepi(DisasContext *s, DisasOps *o) | ||
91 | return DISAS_NORETURN; | ||
92 | } | ||
93 | |||
94 | - gen_gvec_dupi(es, get_field(s, v1), data); | ||
95 | + gen_gvec_dup_imm(es, get_field(s, v1), data); | ||
96 | return DISAS_NEXT; | ||
97 | } | ||
98 | |||
99 | @@ -XXX,XX +XXX,XX @@ static DisasJumpType op_vcksm(DisasContext *s, DisasOps *o) | ||
100 | read_vec_element_i32(tmp, get_field(s, v2), i, ES_32); | ||
101 | tcg_gen_add2_i32(tmp, sum, sum, sum, tmp, tmp); | ||
102 | } | ||
103 | - zero_vec(get_field(s, v1)); | ||
104 | + gen_gvec_dup_imm(ES_32, get_field(s, v1), 0); | ||
105 | write_vec_element_i32(sum, get_field(s, v1), 1, ES_32); | ||
106 | |||
107 | tcg_temp_free_i32(tmp); | ||
108 | -- | ||
109 | 2.20.1 | ||
110 | |||
111 | diff view generated by jsdifflib |
1 | For the benefit of compatibility of function pointer types, | 1 | From: Kele Huang <kele.hwang@gmail.com> |
---|---|---|---|
2 | we have standardized on int32_t and int64_t as the integral | ||
3 | argument to tcg expanders. | ||
4 | 2 | ||
5 | We converted most of them in 474b2e8f0f7, but missed the rotates. | 3 | The addrl used to compare with SoftTLB entry should be sign-extended |
4 | in common case, and it will cause constant failing in SoftTLB | ||
5 | comparisons for the addrl whose address is over 0x80000000 on the | ||
6 | emulation of 32-bit guest on 64-bit host. | ||
6 | 7 | ||
8 | This is an important performance bug fix. Spec2000 gzip rate increase | ||
9 | from ~45 to ~140 on Loongson 3A4000 (MIPS compatible platform). | ||
10 | |||
11 | Signed-off-by: Kele Huang <kele.hwang@gmail.com> | ||
7 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | 12 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> |
8 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | 13 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> |
14 | Message-Id: <20210401100457.191458-1-kele.hwang@gmail.com> | ||
9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 15 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
10 | --- | 16 | --- |
11 | include/tcg/tcg-op.h | 8 ++++---- | 17 | tcg/mips/tcg-target.c.inc | 2 +- |
12 | tcg/tcg-op.c | 16 ++++++++-------- | 18 | 1 file changed, 1 insertion(+), 1 deletion(-) |
13 | 2 files changed, 12 insertions(+), 12 deletions(-) | ||
14 | 19 | ||
15 | diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h | 20 | diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc |
16 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/include/tcg/tcg-op.h | 22 | --- a/tcg/mips/tcg-target.c.inc |
18 | +++ b/include/tcg/tcg-op.h | 23 | +++ b/tcg/mips/tcg-target.c.inc |
19 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2); | 24 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl, |
20 | void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg); | 25 | load the tlb addend for the fast path. */ |
21 | void tcg_gen_ctpop_i32(TCGv_i32 a1, TCGv_i32 a2); | 26 | tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off); |
22 | void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); | ||
23 | -void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2); | ||
24 | +void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2); | ||
25 | void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2); | ||
26 | -void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2); | ||
27 | +void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2); | ||
28 | void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2, | ||
29 | unsigned int ofs, unsigned int len); | ||
30 | void tcg_gen_deposit_z_i32(TCGv_i32 ret, TCGv_i32 arg, | ||
31 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2); | ||
32 | void tcg_gen_clrsb_i64(TCGv_i64 ret, TCGv_i64 arg); | ||
33 | void tcg_gen_ctpop_i64(TCGv_i64 a1, TCGv_i64 a2); | ||
34 | void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); | ||
35 | -void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2); | ||
36 | +void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2); | ||
37 | void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2); | ||
38 | -void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2); | ||
39 | +void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2); | ||
40 | void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2, | ||
41 | unsigned int ofs, unsigned int len); | ||
42 | void tcg_gen_deposit_z_i64(TCGv_i64 ret, TCGv_i64 arg, | ||
43 | diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c | ||
44 | index XXXXXXX..XXXXXXX 100644 | ||
45 | --- a/tcg/tcg-op.c | ||
46 | +++ b/tcg/tcg-op.c | ||
47 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) | ||
48 | } | 27 | } |
49 | } | 28 | - tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl); |
50 | 29 | ||
51 | -void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2) | 30 | /* Zero extend a 32-bit guest address for a 64-bit host. */ |
52 | +void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) | 31 | if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { |
53 | { | 32 | tcg_out_ext32u(s, base, addrl); |
54 | - tcg_debug_assert(arg2 < 32); | 33 | addrl = base; |
55 | + tcg_debug_assert(arg2 >= 0 && arg2 < 32); | ||
56 | /* some cases can be optimized here */ | ||
57 | if (arg2 == 0) { | ||
58 | tcg_gen_mov_i32(ret, arg1); | ||
59 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) | ||
60 | } | 34 | } |
61 | } | 35 | + tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl); |
62 | 36 | ||
63 | -void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2) | 37 | label_ptr[0] = s->code_ptr; |
64 | +void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2) | 38 | tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0); |
65 | { | ||
66 | - tcg_debug_assert(arg2 < 32); | ||
67 | + tcg_debug_assert(arg2 >= 0 && arg2 < 32); | ||
68 | /* some cases can be optimized here */ | ||
69 | if (arg2 == 0) { | ||
70 | tcg_gen_mov_i32(ret, arg1); | ||
71 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) | ||
72 | } | ||
73 | } | ||
74 | |||
75 | -void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2) | ||
76 | +void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) | ||
77 | { | ||
78 | - tcg_debug_assert(arg2 < 64); | ||
79 | + tcg_debug_assert(arg2 >= 0 && arg2 < 64); | ||
80 | /* some cases can be optimized here */ | ||
81 | if (arg2 == 0) { | ||
82 | tcg_gen_mov_i64(ret, arg1); | ||
83 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) | ||
84 | } | ||
85 | } | ||
86 | |||
87 | -void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2) | ||
88 | +void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2) | ||
89 | { | ||
90 | - tcg_debug_assert(arg2 < 64); | ||
91 | + tcg_debug_assert(arg2 >= 0 && arg2 < 64); | ||
92 | /* some cases can be optimized here */ | ||
93 | if (arg2 == 0) { | ||
94 | tcg_gen_mov_i64(ret, arg1); | ||
95 | -- | 39 | -- |
96 | 2.20.1 | 40 | 2.25.1 |
97 | 41 | ||
98 | 42 | diff view generated by jsdifflib |
1 | We can now unify the implementation of the 3 VSPLTI instructions. | 1 | From: Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> |
---|---|---|---|
2 | 2 | ||
3 | Acked-by: David Gibson <david@gibson.dropbear.id.au> | 3 | This patch handles icount mode for timer read/write instructions, |
4 | because it is required to call gen_io_start in such cases. | ||
5 | |||
6 | Signed-off-by: Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-Id: <161700373035.1135822.16451510827008616793.stgit@pasha-ThinkPad-X280> | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
5 | --- | 10 | --- |
6 | target/ppc/translate/vmx-impl.inc.c | 32 ++++++++++++++++------------- | 11 | target/alpha/translate.c | 9 +++++++-- |
7 | target/ppc/translate/vsx-impl.inc.c | 2 +- | 12 | 1 file changed, 7 insertions(+), 2 deletions(-) |
8 | 2 files changed, 19 insertions(+), 15 deletions(-) | ||
9 | 13 | ||
10 | diff --git a/target/ppc/translate/vmx-impl.inc.c b/target/ppc/translate/vmx-impl.inc.c | 14 | diff --git a/target/alpha/translate.c b/target/alpha/translate.c |
11 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
12 | --- a/target/ppc/translate/vmx-impl.inc.c | 16 | --- a/target/alpha/translate.c |
13 | +++ b/target/ppc/translate/vmx-impl.inc.c | 17 | +++ b/target/alpha/translate.c |
14 | @@ -XXX,XX +XXX,XX @@ GEN_VXRFORM_DUAL(vcmpbfp, PPC_ALTIVEC, PPC_NONE, \ | 18 | @@ -XXX,XX +XXX,XX @@ static DisasJumpType gen_mfpr(DisasContext *ctx, TCGv va, int regno) |
15 | GEN_VXRFORM_DUAL(vcmpgtfp, PPC_ALTIVEC, PPC_NONE, \ | 19 | case 249: /* VMTIME */ |
16 | vcmpgtud, PPC_NONE, PPC2_ALTIVEC_207) | 20 | helper = gen_helper_get_vmtime; |
17 | 21 | do_helper: | |
18 | -#define GEN_VXFORM_DUPI(name, tcg_op, opc2, opc3) \ | 22 | - if (icount_enabled()) { |
19 | -static void glue(gen_, name)(DisasContext *ctx) \ | 23 | + if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) { |
20 | - { \ | 24 | gen_io_start(); |
21 | - int simm; \ | 25 | helper(va); |
22 | - if (unlikely(!ctx->altivec_enabled)) { \ | 26 | return DISAS_PC_STALE; |
23 | - gen_exception(ctx, POWERPC_EXCP_VPU); \ | 27 | @@ -XXX,XX +XXX,XX @@ static DisasJumpType gen_mfpr(DisasContext *ctx, TCGv va, int regno) |
24 | - return; \ | 28 | static DisasJumpType gen_mtpr(DisasContext *ctx, TCGv vb, int regno) |
25 | - } \ | 29 | { |
26 | - simm = SIMM5(ctx->opcode); \ | 30 | int data; |
27 | - tcg_op(avr_full_offset(rD(ctx->opcode)), 16, 16, simm); \ | 31 | + DisasJumpType ret = DISAS_NEXT; |
28 | +static void gen_vsplti(DisasContext *ctx, int vece) | 32 | |
29 | +{ | 33 | switch (regno) { |
30 | + int simm; | 34 | case 255: |
31 | + | 35 | @@ -XXX,XX +XXX,XX @@ static DisasJumpType gen_mtpr(DisasContext *ctx, TCGv vb, int regno) |
32 | + if (unlikely(!ctx->altivec_enabled)) { | 36 | |
33 | + gen_exception(ctx, POWERPC_EXCP_VPU); | 37 | case 251: |
34 | + return; | 38 | /* ALARM */ |
39 | + if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) { | ||
40 | + gen_io_start(); | ||
41 | + ret = DISAS_PC_STALE; | ||
42 | + } | ||
43 | gen_helper_set_alarm(cpu_env, vb); | ||
44 | break; | ||
45 | |||
46 | @@ -XXX,XX +XXX,XX @@ static DisasJumpType gen_mtpr(DisasContext *ctx, TCGv vb, int regno) | ||
47 | break; | ||
35 | } | 48 | } |
36 | 49 | ||
37 | -GEN_VXFORM_DUPI(vspltisb, tcg_gen_gvec_dup8i, 6, 12); | 50 | - return DISAS_NEXT; |
38 | -GEN_VXFORM_DUPI(vspltish, tcg_gen_gvec_dup16i, 6, 13); | 51 | + return ret; |
39 | -GEN_VXFORM_DUPI(vspltisw, tcg_gen_gvec_dup32i, 6, 14); | ||
40 | + simm = SIMM5(ctx->opcode); | ||
41 | + tcg_gen_gvec_dup_imm(vece, avr_full_offset(rD(ctx->opcode)), 16, 16, simm); | ||
42 | +} | ||
43 | + | ||
44 | +#define GEN_VXFORM_VSPLTI(name, vece, opc2, opc3) \ | ||
45 | +static void glue(gen_, name)(DisasContext *ctx) { gen_vsplti(ctx, vece); } | ||
46 | + | ||
47 | +GEN_VXFORM_VSPLTI(vspltisb, MO_8, 6, 12); | ||
48 | +GEN_VXFORM_VSPLTI(vspltish, MO_16, 6, 13); | ||
49 | +GEN_VXFORM_VSPLTI(vspltisw, MO_32, 6, 14); | ||
50 | |||
51 | #define GEN_VXFORM_NOA(name, opc2, opc3) \ | ||
52 | static void glue(gen_, name)(DisasContext *ctx) \ | ||
53 | @@ -XXX,XX +XXX,XX @@ GEN_VXFORM_DUAL(vsldoi, PPC_ALTIVEC, PPC_NONE, | ||
54 | #undef GEN_VXRFORM_DUAL | ||
55 | #undef GEN_VXRFORM1 | ||
56 | #undef GEN_VXRFORM | ||
57 | -#undef GEN_VXFORM_DUPI | ||
58 | +#undef GEN_VXFORM_VSPLTI | ||
59 | #undef GEN_VXFORM_NOA | ||
60 | #undef GEN_VXFORM_UIMM | ||
61 | #undef GEN_VAFORM_PAIRED | ||
62 | diff --git a/target/ppc/translate/vsx-impl.inc.c b/target/ppc/translate/vsx-impl.inc.c | ||
63 | index XXXXXXX..XXXXXXX 100644 | ||
64 | --- a/target/ppc/translate/vsx-impl.inc.c | ||
65 | +++ b/target/ppc/translate/vsx-impl.inc.c | ||
66 | @@ -XXX,XX +XXX,XX @@ static void gen_xxspltib(DisasContext *ctx) | ||
67 | return; | ||
68 | } | ||
69 | } | ||
70 | - tcg_gen_gvec_dup8i(vsr_full_offset(rt), 16, 16, uim8); | ||
71 | + tcg_gen_gvec_dup_imm(MO_8, vsr_full_offset(rt), 16, 16, uim8); | ||
72 | } | 52 | } |
73 | 53 | #endif /* !USER_ONLY*/ | |
74 | static void gen_xxsldwi(DisasContext *ctx) | 54 | |
75 | -- | 55 | -- |
76 | 2.20.1 | 56 | 2.25.1 |
77 | 57 | ||
78 | 58 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | In a few cases, we're able to remove some manual replication. | ||
2 | 1 | ||
3 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | --- | ||
6 | target/arm/translate-a64.c | 10 +++++----- | ||
7 | target/arm/translate-sve.c | 12 +++++------- | ||
8 | target/arm/translate.c | 9 ++++++--- | ||
9 | 3 files changed, 16 insertions(+), 15 deletions(-) | ||
10 | |||
11 | diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/target/arm/translate-a64.c | ||
14 | +++ b/target/arm/translate-a64.c | ||
15 | @@ -XXX,XX +XXX,XX @@ static void clear_vec_high(DisasContext *s, bool is_q, int rd) | ||
16 | tcg_temp_free_i64(tcg_zero); | ||
17 | } | ||
18 | if (vsz > 16) { | ||
19 | - tcg_gen_gvec_dup8i(ofs + 16, vsz - 16, vsz - 16, 0); | ||
20 | + tcg_gen_gvec_dup_imm(MO_64, ofs + 16, vsz - 16, vsz - 16, 0); | ||
21 | } | ||
22 | } | ||
23 | |||
24 | @@ -XXX,XX +XXX,XX @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn) | ||
25 | |||
26 | if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) { | ||
27 | /* MOVI or MVNI, with MVNI negation handled above. */ | ||
28 | - tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), is_q ? 16 : 8, | ||
29 | - vec_full_reg_size(s), imm); | ||
30 | + tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8, | ||
31 | + vec_full_reg_size(s), imm); | ||
32 | } else { | ||
33 | /* ORR or BIC, with BIC negation to AND handled above. */ | ||
34 | if (is_neg) { | ||
35 | @@ -XXX,XX +XXX,XX @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u, | ||
36 | if (is_u) { | ||
37 | if (shift == 8 << size) { | ||
38 | /* Shift count the same size as element size produces zero. */ | ||
39 | - tcg_gen_gvec_dup8i(vec_full_reg_offset(s, rd), | ||
40 | - is_q ? 16 : 8, vec_full_reg_size(s), 0); | ||
41 | + tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd), | ||
42 | + is_q ? 16 : 8, vec_full_reg_size(s), 0); | ||
43 | } else { | ||
44 | gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shri, size); | ||
45 | } | ||
46 | diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c | ||
47 | index XXXXXXX..XXXXXXX 100644 | ||
48 | --- a/target/arm/translate-sve.c | ||
49 | +++ b/target/arm/translate-sve.c | ||
50 | @@ -XXX,XX +XXX,XX @@ static bool do_mov_z(DisasContext *s, int rd, int rn) | ||
51 | static void do_dupi_z(DisasContext *s, int rd, uint64_t word) | ||
52 | { | ||
53 | unsigned vsz = vec_full_reg_size(s); | ||
54 | - tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word); | ||
55 | + tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word); | ||
56 | } | ||
57 | |||
58 | /* Invoke a vector expander on two Pregs. */ | ||
59 | @@ -XXX,XX +XXX,XX @@ static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag) | ||
60 | unsigned oprsz = size_for_gvec(setsz / 8); | ||
61 | |||
62 | if (oprsz * 8 == setsz) { | ||
63 | - tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word); | ||
64 | + tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word); | ||
65 | goto done; | ||
66 | } | ||
67 | } | ||
68 | @@ -XXX,XX +XXX,XX @@ static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a) | ||
69 | unsigned nofs = vec_reg_offset(s, a->rn, index, esz); | ||
70 | tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz); | ||
71 | } else { | ||
72 | - tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0); | ||
73 | + tcg_gen_gvec_dup_imm(esz, dofs, vsz, vsz, 0); | ||
74 | } | ||
75 | } | ||
76 | return true; | ||
77 | @@ -XXX,XX +XXX,XX @@ static bool trans_FDUP(DisasContext *s, arg_FDUP *a) | ||
78 | |||
79 | /* Decode the VFP immediate. */ | ||
80 | imm = vfp_expand_imm(a->esz, a->imm); | ||
81 | - imm = dup_const(a->esz, imm); | ||
82 | - | ||
83 | - tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm); | ||
84 | + tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm); | ||
85 | } | ||
86 | return true; | ||
87 | } | ||
88 | @@ -XXX,XX +XXX,XX @@ static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a) | ||
89 | unsigned vsz = vec_full_reg_size(s); | ||
90 | int dofs = vec_full_reg_offset(s, a->rd); | ||
91 | |||
92 | - tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm)); | ||
93 | + tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm); | ||
94 | } | ||
95 | return true; | ||
96 | } | ||
97 | diff --git a/target/arm/translate.c b/target/arm/translate.c | ||
98 | index XXXXXXX..XXXXXXX 100644 | ||
99 | --- a/target/arm/translate.c | ||
100 | +++ b/target/arm/translate.c | ||
101 | @@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) | ||
102 | MIN(shift, (8 << size) - 1), | ||
103 | vec_size, vec_size); | ||
104 | } else if (shift >= 8 << size) { | ||
105 | - tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0); | ||
106 | + tcg_gen_gvec_dup_imm(MO_8, rd_ofs, vec_size, | ||
107 | + vec_size, 0); | ||
108 | } else { | ||
109 | tcg_gen_gvec_shri(size, rd_ofs, rm_ofs, shift, | ||
110 | vec_size, vec_size); | ||
111 | @@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) | ||
112 | * architecturally valid and results in zero. | ||
113 | */ | ||
114 | if (shift >= 8 << size) { | ||
115 | - tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0); | ||
116 | + tcg_gen_gvec_dup_imm(size, rd_ofs, | ||
117 | + vec_size, vec_size, 0); | ||
118 | } else { | ||
119 | tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift, | ||
120 | vec_size, vec_size); | ||
121 | @@ -XXX,XX +XXX,XX @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) | ||
122 | } | ||
123 | tcg_temp_free_i64(t64); | ||
124 | } else { | ||
125 | - tcg_gen_gvec_dup32i(reg_ofs, vec_size, vec_size, imm); | ||
126 | + tcg_gen_gvec_dup_imm(MO_32, reg_ofs, vec_size, | ||
127 | + vec_size, imm); | ||
128 | } | ||
129 | } | ||
130 | } | ||
131 | -- | ||
132 | 2.20.1 | ||
133 | |||
134 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Replace the outgoing interface. | ||
2 | 1 | ||
3 | Reviewed-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
4 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | tcg/tcg-op-gvec.c | 8 ++++---- | ||
8 | 1 file changed, 4 insertions(+), 4 deletions(-) | ||
9 | |||
10 | diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/tcg/tcg-op-gvec.c | ||
13 | +++ b/tcg/tcg-op-gvec.c | ||
14 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_xor(unsigned vece, uint32_t dofs, uint32_t aofs, | ||
15 | }; | ||
16 | |||
17 | if (aofs == bofs) { | ||
18 | - tcg_gen_gvec_dup8i(dofs, oprsz, maxsz, 0); | ||
19 | + tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, 0); | ||
20 | } else { | ||
21 | tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g); | ||
22 | } | ||
23 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_andc(unsigned vece, uint32_t dofs, uint32_t aofs, | ||
24 | }; | ||
25 | |||
26 | if (aofs == bofs) { | ||
27 | - tcg_gen_gvec_dup8i(dofs, oprsz, maxsz, 0); | ||
28 | + tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, 0); | ||
29 | } else { | ||
30 | tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g); | ||
31 | } | ||
32 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_orc(unsigned vece, uint32_t dofs, uint32_t aofs, | ||
33 | }; | ||
34 | |||
35 | if (aofs == bofs) { | ||
36 | - tcg_gen_gvec_dup8i(dofs, oprsz, maxsz, -1); | ||
37 | + tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, -1); | ||
38 | } else { | ||
39 | tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g); | ||
40 | } | ||
41 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_eqv(unsigned vece, uint32_t dofs, uint32_t aofs, | ||
42 | }; | ||
43 | |||
44 | if (aofs == bofs) { | ||
45 | - tcg_gen_gvec_dup8i(dofs, oprsz, maxsz, -1); | ||
46 | + tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, -1); | ||
47 | } else { | ||
48 | tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g); | ||
49 | } | ||
50 | -- | ||
51 | 2.20.1 | ||
52 | |||
53 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | These interfaces are now unused. | ||
2 | 1 | ||
3 | Reviewed-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
4 | Reviewed-by: David Hildenbrand <david@redhat.com> | ||
5 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | include/tcg/tcg-op-gvec.h | 5 ----- | ||
9 | tcg/tcg-op-gvec.c | 28 ---------------------------- | ||
10 | 2 files changed, 33 deletions(-) | ||
11 | |||
12 | diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/include/tcg/tcg-op-gvec.h | ||
15 | +++ b/include/tcg/tcg-op-gvec.h | ||
16 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_dup_i32(unsigned vece, uint32_t dofs, uint32_t s, | ||
17 | void tcg_gen_gvec_dup_i64(unsigned vece, uint32_t dofs, uint32_t s, | ||
18 | uint32_t m, TCGv_i64); | ||
19 | |||
20 | -void tcg_gen_gvec_dup8i(uint32_t dofs, uint32_t s, uint32_t m, uint8_t x); | ||
21 | -void tcg_gen_gvec_dup16i(uint32_t dofs, uint32_t s, uint32_t m, uint16_t x); | ||
22 | -void tcg_gen_gvec_dup32i(uint32_t dofs, uint32_t s, uint32_t m, uint32_t x); | ||
23 | -void tcg_gen_gvec_dup64i(uint32_t dofs, uint32_t s, uint32_t m, uint64_t x); | ||
24 | - | ||
25 | void tcg_gen_gvec_shli(unsigned vece, uint32_t dofs, uint32_t aofs, | ||
26 | int64_t shift, uint32_t oprsz, uint32_t maxsz); | ||
27 | void tcg_gen_gvec_shri(unsigned vece, uint32_t dofs, uint32_t aofs, | ||
28 | diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/tcg/tcg-op-gvec.c | ||
31 | +++ b/tcg/tcg-op-gvec.c | ||
32 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs, | ||
33 | } | ||
34 | } | ||
35 | |||
36 | -void tcg_gen_gvec_dup64i(uint32_t dofs, uint32_t oprsz, | ||
37 | - uint32_t maxsz, uint64_t x) | ||
38 | -{ | ||
39 | - check_size_align(oprsz, maxsz, dofs); | ||
40 | - do_dup(MO_64, dofs, oprsz, maxsz, NULL, NULL, x); | ||
41 | -} | ||
42 | - | ||
43 | -void tcg_gen_gvec_dup32i(uint32_t dofs, uint32_t oprsz, | ||
44 | - uint32_t maxsz, uint32_t x) | ||
45 | -{ | ||
46 | - check_size_align(oprsz, maxsz, dofs); | ||
47 | - do_dup(MO_32, dofs, oprsz, maxsz, NULL, NULL, x); | ||
48 | -} | ||
49 | - | ||
50 | -void tcg_gen_gvec_dup16i(uint32_t dofs, uint32_t oprsz, | ||
51 | - uint32_t maxsz, uint16_t x) | ||
52 | -{ | ||
53 | - check_size_align(oprsz, maxsz, dofs); | ||
54 | - do_dup(MO_16, dofs, oprsz, maxsz, NULL, NULL, x); | ||
55 | -} | ||
56 | - | ||
57 | -void tcg_gen_gvec_dup8i(uint32_t dofs, uint32_t oprsz, | ||
58 | - uint32_t maxsz, uint8_t x) | ||
59 | -{ | ||
60 | - check_size_align(oprsz, maxsz, dofs); | ||
61 | - do_dup(MO_8, dofs, oprsz, maxsz, NULL, NULL, x); | ||
62 | -} | ||
63 | - | ||
64 | void tcg_gen_gvec_dup_imm(unsigned vece, uint32_t dofs, uint32_t oprsz, | ||
65 | uint32_t maxsz, uint64_t x) | ||
66 | { | ||
67 | -- | ||
68 | 2.20.1 | ||
69 | |||
70 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | For use when a target needs to pass a configure-specific | ||
2 | target_ulong value to duplicate. | ||
3 | 1 | ||
4 | Reviewed-by: LIU Zhiwei <zhiwei_liu@c-sky.com> | ||
5 | Reviewed-by: David Hildenbrand <david@redhat.com> | ||
6 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | --- | ||
9 | include/tcg/tcg-op-gvec.h | 6 ++++++ | ||
10 | 1 file changed, 6 insertions(+) | ||
11 | |||
12 | diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/include/tcg/tcg-op-gvec.h | ||
15 | +++ b/include/tcg/tcg-op-gvec.h | ||
16 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_dup_i32(unsigned vece, uint32_t dofs, uint32_t s, | ||
17 | void tcg_gen_gvec_dup_i64(unsigned vece, uint32_t dofs, uint32_t s, | ||
18 | uint32_t m, TCGv_i64); | ||
19 | |||
20 | +#if TARGET_LONG_BITS == 64 | ||
21 | +# define tcg_gen_gvec_dup_tl tcg_gen_gvec_dup_i64 | ||
22 | +#else | ||
23 | +# define tcg_gen_gvec_dup_tl tcg_gen_gvec_dup_i32 | ||
24 | +#endif | ||
25 | + | ||
26 | void tcg_gen_gvec_shli(unsigned vece, uint32_t dofs, uint32_t aofs, | ||
27 | int64_t shift, uint32_t oprsz, uint32_t maxsz); | ||
28 | void tcg_gen_gvec_shri(unsigned vece, uint32_t dofs, uint32_t aofs, | ||
29 | -- | ||
30 | 2.20.1 | ||
31 | |||
32 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Better handling of non-power-of-2 tails as seen with Arm 8-byte | ||
2 | vector operations. | ||
3 | 1 | ||
4 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | tcg/tcg-op-gvec.c | 82 ++++++++++++++++++++++++++++++++++++----------- | ||
8 | 1 file changed, 63 insertions(+), 19 deletions(-) | ||
9 | |||
10 | diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/tcg/tcg-op-gvec.c | ||
13 | +++ b/tcg/tcg-op-gvec.c | ||
14 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_5_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs, | ||
15 | in units of LNSZ. This limits the expansion of inline code. */ | ||
16 | static inline bool check_size_impl(uint32_t oprsz, uint32_t lnsz) | ||
17 | { | ||
18 | - if (oprsz % lnsz == 0) { | ||
19 | - uint32_t lnct = oprsz / lnsz; | ||
20 | - return lnct >= 1 && lnct <= MAX_UNROLL; | ||
21 | + uint32_t q, r; | ||
22 | + | ||
23 | + if (oprsz < lnsz) { | ||
24 | + return false; | ||
25 | } | ||
26 | - return false; | ||
27 | + | ||
28 | + q = oprsz / lnsz; | ||
29 | + r = oprsz % lnsz; | ||
30 | + tcg_debug_assert((r & 7) == 0); | ||
31 | + | ||
32 | + if (lnsz < 16) { | ||
33 | + /* For sizes below 16, accept no remainder. */ | ||
34 | + if (r != 0) { | ||
35 | + return false; | ||
36 | + } | ||
37 | + } else { | ||
38 | + /* | ||
39 | + * Recall that ARM SVE allows vector sizes that are not a | ||
40 | + * power of 2, but always a multiple of 16. The intent is | ||
41 | + * that e.g. size == 80 would be expanded with 2x32 + 1x16. | ||
42 | + * In addition, expand_clr needs to handle a multiple of 8. | ||
43 | + * Thus we can handle the tail with one more operation per | ||
44 | + * diminishing power of 2. | ||
45 | + */ | ||
46 | + q += ctpop32(r); | ||
47 | + } | ||
48 | + | ||
49 | + return q <= MAX_UNROLL; | ||
50 | } | ||
51 | |||
52 | static void expand_clr(uint32_t dofs, uint32_t maxsz); | ||
53 | @@ -XXX,XX +XXX,XX @@ static void gen_dup_i64(unsigned vece, TCGv_i64 out, TCGv_i64 in) | ||
54 | static TCGType choose_vector_type(const TCGOpcode *list, unsigned vece, | ||
55 | uint32_t size, bool prefer_i64) | ||
56 | { | ||
57 | - if (TCG_TARGET_HAS_v256 && check_size_impl(size, 32)) { | ||
58 | - /* | ||
59 | - * Recall that ARM SVE allows vector sizes that are not a | ||
60 | - * power of 2, but always a multiple of 16. The intent is | ||
61 | - * that e.g. size == 80 would be expanded with 2x32 + 1x16. | ||
62 | - * It is hard to imagine a case in which v256 is supported | ||
63 | - * but v128 is not, but check anyway. | ||
64 | - */ | ||
65 | - if (tcg_can_emit_vecop_list(list, TCG_TYPE_V256, vece) | ||
66 | - && (size % 32 == 0 | ||
67 | - || tcg_can_emit_vecop_list(list, TCG_TYPE_V128, vece))) { | ||
68 | - return TCG_TYPE_V256; | ||
69 | - } | ||
70 | + /* | ||
71 | + * Recall that ARM SVE allows vector sizes that are not a | ||
72 | + * power of 2, but always a multiple of 16. The intent is | ||
73 | + * that e.g. size == 80 would be expanded with 2x32 + 1x16. | ||
74 | + * It is hard to imagine a case in which v256 is supported | ||
75 | + * but v128 is not, but check anyway. | ||
76 | + * In addition, expand_clr needs to handle a multiple of 8. | ||
77 | + */ | ||
78 | + if (TCG_TARGET_HAS_v256 && | ||
79 | + check_size_impl(size, 32) && | ||
80 | + tcg_can_emit_vecop_list(list, TCG_TYPE_V256, vece) && | ||
81 | + (!(size & 16) || | ||
82 | + (TCG_TARGET_HAS_v128 && | ||
83 | + tcg_can_emit_vecop_list(list, TCG_TYPE_V128, vece))) && | ||
84 | + (!(size & 8) || | ||
85 | + (TCG_TARGET_HAS_v64 && | ||
86 | + tcg_can_emit_vecop_list(list, TCG_TYPE_V64, vece)))) { | ||
87 | + return TCG_TYPE_V256; | ||
88 | } | ||
89 | - if (TCG_TARGET_HAS_v128 && check_size_impl(size, 16) | ||
90 | - && tcg_can_emit_vecop_list(list, TCG_TYPE_V128, vece)) { | ||
91 | + if (TCG_TARGET_HAS_v128 && | ||
92 | + check_size_impl(size, 16) && | ||
93 | + tcg_can_emit_vecop_list(list, TCG_TYPE_V128, vece) && | ||
94 | + (!(size & 8) || | ||
95 | + (TCG_TARGET_HAS_v64 && | ||
96 | + tcg_can_emit_vecop_list(list, TCG_TYPE_V64, vece)))) { | ||
97 | return TCG_TYPE_V128; | ||
98 | } | ||
99 | if (TCG_TARGET_HAS_v64 && !prefer_i64 && check_size_impl(size, 8) | ||
100 | @@ -XXX,XX +XXX,XX @@ static void do_dup_store(TCGType type, uint32_t dofs, uint32_t oprsz, | ||
101 | { | ||
102 | uint32_t i = 0; | ||
103 | |||
104 | + tcg_debug_assert(oprsz >= 8); | ||
105 | + | ||
106 | + /* | ||
107 | + * This may be expand_clr for the tail of an operation, e.g. | ||
108 | + * oprsz == 8 && maxsz == 64. The first 8 bytes of this store | ||
109 | + * are misaligned wrt the maximum vector size, so do that first. | ||
110 | + */ | ||
111 | + if (dofs & 8) { | ||
112 | + tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V64); | ||
113 | + i += 8; | ||
114 | + } | ||
115 | + | ||
116 | switch (type) { | ||
117 | case TCG_TYPE_V256: | ||
118 | /* | ||
119 | -- | ||
120 | 2.20.1 | ||
121 | |||
122 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | We have this same parameter for GVecGen2i, GVecGen3, | ||
2 | and GVecGen3i. This will make some SVE2 insns easier | ||
3 | to parameterize. | ||
4 | 1 | ||
5 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | include/tcg/tcg-op-gvec.h | 2 ++ | ||
9 | tcg/tcg-op-gvec.c | 45 ++++++++++++++++++++++++++++----------- | ||
10 | 2 files changed, 34 insertions(+), 13 deletions(-) | ||
11 | |||
12 | diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/include/tcg/tcg-op-gvec.h | ||
15 | +++ b/include/tcg/tcg-op-gvec.h | ||
16 | @@ -XXX,XX +XXX,XX @@ typedef struct { | ||
17 | uint8_t vece; | ||
18 | /* Prefer i64 to v64. */ | ||
19 | bool prefer_i64; | ||
20 | + /* Load dest as a 2nd source operand. */ | ||
21 | + bool load_dest; | ||
22 | } GVecGen2; | ||
23 | |||
24 | typedef struct { | ||
25 | diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c | ||
26 | index XXXXXXX..XXXXXXX 100644 | ||
27 | --- a/tcg/tcg-op-gvec.c | ||
28 | +++ b/tcg/tcg-op-gvec.c | ||
29 | @@ -XXX,XX +XXX,XX @@ static void expand_clr(uint32_t dofs, uint32_t maxsz) | ||
30 | |||
31 | /* Expand OPSZ bytes worth of two-operand operations using i32 elements. */ | ||
32 | static void expand_2_i32(uint32_t dofs, uint32_t aofs, uint32_t oprsz, | ||
33 | - void (*fni)(TCGv_i32, TCGv_i32)) | ||
34 | + bool load_dest, void (*fni)(TCGv_i32, TCGv_i32)) | ||
35 | { | ||
36 | TCGv_i32 t0 = tcg_temp_new_i32(); | ||
37 | + TCGv_i32 t1 = tcg_temp_new_i32(); | ||
38 | uint32_t i; | ||
39 | |||
40 | for (i = 0; i < oprsz; i += 4) { | ||
41 | tcg_gen_ld_i32(t0, cpu_env, aofs + i); | ||
42 | - fni(t0, t0); | ||
43 | - tcg_gen_st_i32(t0, cpu_env, dofs + i); | ||
44 | + if (load_dest) { | ||
45 | + tcg_gen_ld_i32(t1, cpu_env, dofs + i); | ||
46 | + } | ||
47 | + fni(t1, t0); | ||
48 | + tcg_gen_st_i32(t1, cpu_env, dofs + i); | ||
49 | } | ||
50 | tcg_temp_free_i32(t0); | ||
51 | + tcg_temp_free_i32(t1); | ||
52 | } | ||
53 | |||
54 | static void expand_2i_i32(uint32_t dofs, uint32_t aofs, uint32_t oprsz, | ||
55 | @@ -XXX,XX +XXX,XX @@ static void expand_4_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs, | ||
56 | |||
57 | /* Expand OPSZ bytes worth of two-operand operations using i64 elements. */ | ||
58 | static void expand_2_i64(uint32_t dofs, uint32_t aofs, uint32_t oprsz, | ||
59 | - void (*fni)(TCGv_i64, TCGv_i64)) | ||
60 | + bool load_dest, void (*fni)(TCGv_i64, TCGv_i64)) | ||
61 | { | ||
62 | TCGv_i64 t0 = tcg_temp_new_i64(); | ||
63 | + TCGv_i64 t1 = tcg_temp_new_i64(); | ||
64 | uint32_t i; | ||
65 | |||
66 | for (i = 0; i < oprsz; i += 8) { | ||
67 | tcg_gen_ld_i64(t0, cpu_env, aofs + i); | ||
68 | - fni(t0, t0); | ||
69 | - tcg_gen_st_i64(t0, cpu_env, dofs + i); | ||
70 | + if (load_dest) { | ||
71 | + tcg_gen_ld_i64(t1, cpu_env, dofs + i); | ||
72 | + } | ||
73 | + fni(t1, t0); | ||
74 | + tcg_gen_st_i64(t1, cpu_env, dofs + i); | ||
75 | } | ||
76 | tcg_temp_free_i64(t0); | ||
77 | + tcg_temp_free_i64(t1); | ||
78 | } | ||
79 | |||
80 | static void expand_2i_i64(uint32_t dofs, uint32_t aofs, uint32_t oprsz, | ||
81 | @@ -XXX,XX +XXX,XX @@ static void expand_4_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs, | ||
82 | /* Expand OPSZ bytes worth of two-operand operations using host vectors. */ | ||
83 | static void expand_2_vec(unsigned vece, uint32_t dofs, uint32_t aofs, | ||
84 | uint32_t oprsz, uint32_t tysz, TCGType type, | ||
85 | + bool load_dest, | ||
86 | void (*fni)(unsigned, TCGv_vec, TCGv_vec)) | ||
87 | { | ||
88 | TCGv_vec t0 = tcg_temp_new_vec(type); | ||
89 | + TCGv_vec t1 = tcg_temp_new_vec(type); | ||
90 | uint32_t i; | ||
91 | |||
92 | for (i = 0; i < oprsz; i += tysz) { | ||
93 | tcg_gen_ld_vec(t0, cpu_env, aofs + i); | ||
94 | - fni(vece, t0, t0); | ||
95 | - tcg_gen_st_vec(t0, cpu_env, dofs + i); | ||
96 | + if (load_dest) { | ||
97 | + tcg_gen_ld_vec(t1, cpu_env, dofs + i); | ||
98 | + } | ||
99 | + fni(vece, t1, t0); | ||
100 | + tcg_gen_st_vec(t1, cpu_env, dofs + i); | ||
101 | } | ||
102 | tcg_temp_free_vec(t0); | ||
103 | + tcg_temp_free_vec(t1); | ||
104 | } | ||
105 | |||
106 | /* Expand OPSZ bytes worth of two-vector operands and an immediate operand | ||
107 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs, | ||
108 | * that e.g. size == 80 would be expanded with 2x32 + 1x16. | ||
109 | */ | ||
110 | some = QEMU_ALIGN_DOWN(oprsz, 32); | ||
111 | - expand_2_vec(g->vece, dofs, aofs, some, 32, TCG_TYPE_V256, g->fniv); | ||
112 | + expand_2_vec(g->vece, dofs, aofs, some, 32, TCG_TYPE_V256, | ||
113 | + g->load_dest, g->fniv); | ||
114 | if (some == oprsz) { | ||
115 | break; | ||
116 | } | ||
117 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs, | ||
118 | maxsz -= some; | ||
119 | /* fallthru */ | ||
120 | case TCG_TYPE_V128: | ||
121 | - expand_2_vec(g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128, g->fniv); | ||
122 | + expand_2_vec(g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128, | ||
123 | + g->load_dest, g->fniv); | ||
124 | break; | ||
125 | case TCG_TYPE_V64: | ||
126 | - expand_2_vec(g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64, g->fniv); | ||
127 | + expand_2_vec(g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64, | ||
128 | + g->load_dest, g->fniv); | ||
129 | break; | ||
130 | |||
131 | case 0: | ||
132 | if (g->fni8 && check_size_impl(oprsz, 8)) { | ||
133 | - expand_2_i64(dofs, aofs, oprsz, g->fni8); | ||
134 | + expand_2_i64(dofs, aofs, oprsz, g->load_dest, g->fni8); | ||
135 | } else if (g->fni4 && check_size_impl(oprsz, 4)) { | ||
136 | - expand_2_i32(dofs, aofs, oprsz, g->fni4); | ||
137 | + expand_2_i32(dofs, aofs, oprsz, g->load_dest, g->fni4); | ||
138 | } else { | ||
139 | assert(g->fno != NULL); | ||
140 | tcg_gen_gvec_2_ool(dofs, aofs, oprsz, maxsz, g->data, g->fno); | ||
141 | -- | ||
142 | 2.20.1 | ||
143 | |||
144 | diff view generated by jsdifflib |