1 | The following changes since commit 2d894e48362ad2a576fca929dcca1787f43a8af6: | 1 | The following changes since commit 9e5319ca52a5b9e84d55ad9c36e2c0b317a122bb: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/stefanha/tags/tracing-pull-request' into staging (2018-12-13 17:50:45 +0000) | 3 | Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging (2019-10-04 18:32:34 +0100) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | https://github.com/rth7680/qemu.git tags/pull-tcg-20181213 | 7 | https://github.com/rth7680/qemu.git tags/pull-tcg-20191013 |
8 | 8 | ||
9 | for you to fetch changes up to 99f70ba5b6b4566509b2069a8d29c6686b8115de: | 9 | for you to fetch changes up to d2f86bba6931388e275e8eb4ccd1dbcc7cae6328: |
10 | 10 | ||
11 | xxhash: match output against the original xxhash32 (2018-12-13 18:56:11 -0600) | 11 | cpus: kick all vCPUs when running thread=single (2019-10-07 14:08:58 -0400) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | - Remove retranslation remenents | 14 | Host vector support for tcg/ppc. |
15 | - Return success from patch_reloc | 15 | Fix thread=single cpu kicking. |
16 | - Preserve 32-bit values as zero-extended on x86_64 | ||
17 | - Make bswap during memory ops as optional | ||
18 | - Cleanup xxhash | ||
19 | 16 | ||
20 | ---------------------------------------------------------------- | 17 | ---------------------------------------------------------------- |
21 | Alistair Francis (1): | 18 | Alex Bennée (1): |
22 | tcg/mips: Improve the add2/sub2 command to use TCG_TARGET_REG_BITS | 19 | cpus: kick all vCPUs when running thread=single |
23 | 20 | ||
24 | Emilio G. Cota (5): | 21 | Richard Henderson (22): |
25 | tcg: Drop nargs from tcg_op_insert_{before,after} | 22 | tcg/ppc: Introduce Altivec registers |
26 | qht-bench: document -p flag | 23 | tcg/ppc: Introduce macro VX4() |
27 | exec: introduce qemu_xxhash{2,4,5,6,7} | 24 | tcg/ppc: Introduce macros VRT(), VRA(), VRB(), VRC() |
28 | include: move exec/tb-hash-xx.h to qemu/xxhash.h | 25 | tcg/ppc: Create TCGPowerISA and have_isa |
29 | xxhash: match output against the original xxhash32 | 26 | tcg/ppc: Replace HAVE_ISA_2_06 |
27 | tcg/ppc: Replace HAVE_ISEL macro with a variable | ||
28 | tcg/ppc: Enable tcg backend vector compilation | ||
29 | tcg/ppc: Add support for load/store/logic/comparison | ||
30 | tcg/ppc: Add support for vector maximum/minimum | ||
31 | tcg/ppc: Add support for vector add/subtract | ||
32 | tcg/ppc: Add support for vector saturated add/subtract | ||
33 | tcg/ppc: Support vector shift by immediate | ||
34 | tcg/ppc: Support vector multiply | ||
35 | tcg/ppc: Support vector dup2 | ||
36 | tcg/ppc: Enable Altivec detection | ||
37 | tcg/ppc: Update vector support for VSX | ||
38 | tcg/ppc: Update vector support for v2.07 Altivec | ||
39 | tcg/ppc: Update vector support for v2.07 VSX | ||
40 | tcg/ppc: Update vector support for v2.07 FP | ||
41 | tcg/ppc: Update vector support for v3.00 Altivec | ||
42 | tcg/ppc: Update vector support for v3.00 load/store | ||
43 | tcg/ppc: Update vector support for v3.00 dup/dupi | ||
30 | 44 | ||
31 | Richard Henderson (26): | 45 | tcg/ppc/tcg-target.h | 51 ++- |
32 | tcg/i386: Always use %ebp for TCG_AREG0 | 46 | tcg/ppc/tcg-target.opc.h | 13 + |
33 | tcg/i386: Move TCG_REG_CALL_STACK from define to enum | 47 | cpus.c | 24 +- |
34 | tcg/aarch64: Remove reloc_pc26_atomic | 48 | tcg/ppc/tcg-target.inc.c | 1118 ++++++++++++++++++++++++++++++++++++++++++---- |
35 | tcg/aarch64: Fold away "noaddr" branch routines | 49 | 4 files changed, 1119 insertions(+), 87 deletions(-) |
36 | tcg/arm: Remove reloc_pc24_atomic | 50 | create mode 100644 tcg/ppc/tcg-target.opc.h |
37 | tcg/arm: Fold away "noaddr" branch routines | ||
38 | tcg/ppc: Fold away "noaddr" branch routines | ||
39 | tcg/s390: Remove retranslation code | ||
40 | tcg/sparc: Remove retranslation code | ||
41 | tcg/mips: Remove retranslation code | ||
42 | tcg: Return success from patch_reloc | ||
43 | tcg/i386: Return false on failure from patch_reloc | ||
44 | tcg/aarch64: Return false on failure from patch_reloc | ||
45 | tcg/arm: Return false on failure from patch_reloc | ||
46 | tcg/ppc: Return false on failure from patch_reloc | ||
47 | tcg/s390x: Return false on failure from patch_reloc | ||
48 | tcg/i386: Propagate is64 to tcg_out_qemu_ld_direct | ||
49 | tcg/i386: Propagate is64 to tcg_out_qemu_ld_slow_path | ||
50 | tcg/i386: Implement INDEX_op_extr{lh}_i64_i32 for 32-bit guests | ||
51 | tcg/i386: Assume 32-bit values are zero-extended | ||
52 | tcg/i386: Precompute all guest_base parameters | ||
53 | tcg/i386: Add setup_guest_base_seg for FreeBSD | ||
54 | tcg: Clean up generic bswap32 | ||
55 | tcg: Clean up generic bswap64 | ||
56 | tcg/optimize: Optimize bswap | ||
57 | tcg: Add TCG_TARGET_HAS_MEMORY_BSWAP | ||
58 | 51 | ||
59 | include/exec/tb-hash.h | 4 +- | ||
60 | include/{exec/tb-hash-xx.h => qemu/xxhash.h} | 47 ++++-- | ||
61 | tcg/aarch64/tcg-target.h | 1 + | ||
62 | tcg/arm/tcg-target.h | 1 + | ||
63 | tcg/i386/tcg-target.h | 17 +-- | ||
64 | tcg/mips/tcg-target.h | 1 + | ||
65 | tcg/ppc/tcg-target.h | 1 + | ||
66 | tcg/s390/tcg-target.h | 1 + | ||
67 | tcg/sparc/tcg-target.h | 1 + | ||
68 | tcg/tcg.h | 4 +- | ||
69 | tcg/tci/tcg-target.h | 2 + | ||
70 | tcg/aarch64/tcg-target.inc.c | 71 +++------ | ||
71 | tcg/arm/tcg-target.inc.c | 55 +++---- | ||
72 | tcg/i386/tcg-target.inc.c | 208 ++++++++++++-------------- | ||
73 | tcg/mips/tcg-target.inc.c | 12 +- | ||
74 | tcg/optimize.c | 16 +- | ||
75 | tcg/ppc/tcg-target.inc.c | 60 ++++---- | ||
76 | tcg/s390/tcg-target.inc.c | 45 +++--- | ||
77 | tcg/sparc/tcg-target.inc.c | 13 +- | ||
78 | tcg/tcg-op.c | 215 ++++++++++++++++++++------- | ||
79 | tcg/tcg.c | 18 +-- | ||
80 | tcg/tci/tcg-target.inc.c | 3 +- | ||
81 | tests/qht-bench.c | 5 +- | ||
82 | util/qsp.c | 14 +- | ||
83 | 24 files changed, 452 insertions(+), 363 deletions(-) | ||
84 | rename include/{exec/tb-hash-xx.h => qemu/xxhash.h} (73%) | ||
85 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | For x86_64, this can remove a REX prefix resulting in smaller code | ||
2 | when manipulating globals of type i32, as we move them between backing | ||
3 | store via cpu_env, aka TCG_AREG0. | ||
4 | 1 | ||
5 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
6 | Reviewed-by: Emilio G. Cota <cota@braap.org> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | --- | ||
9 | tcg/i386/tcg-target.h | 8 ++------ | ||
10 | 1 file changed, 2 insertions(+), 6 deletions(-) | ||
11 | |||
12 | diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/tcg/i386/tcg-target.h | ||
15 | +++ b/tcg/i386/tcg-target.h | ||
16 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
17 | TCG_REG_RBP = TCG_REG_EBP, | ||
18 | TCG_REG_RSI = TCG_REG_ESI, | ||
19 | TCG_REG_RDI = TCG_REG_EDI, | ||
20 | + | ||
21 | + TCG_AREG0 = TCG_REG_EBP, | ||
22 | } TCGReg; | ||
23 | |||
24 | /* used for function call generation */ | ||
25 | @@ -XXX,XX +XXX,XX @@ extern bool have_avx2; | ||
26 | #define TCG_TARGET_extract_i64_valid(ofs, len) \ | ||
27 | (((ofs) == 8 && (len) == 8) || ((ofs) + (len)) == 32) | ||
28 | |||
29 | -#if TCG_TARGET_REG_BITS == 64 | ||
30 | -# define TCG_AREG0 TCG_REG_R14 | ||
31 | -#else | ||
32 | -# define TCG_AREG0 TCG_REG_EBP | ||
33 | -#endif | ||
34 | - | ||
35 | static inline void flush_icache_range(uintptr_t start, uintptr_t stop) | ||
36 | { | ||
37 | } | ||
38 | -- | ||
39 | 2.17.2 | ||
40 | |||
41 | diff view generated by jsdifflib |
1 | This preserves the invariant that all TCG_TYPE_I32 values are | 1 | Altivec supports 32 128-bit vector registers, whose names are |
---|---|---|---|
2 | zero-extended in the 64-bit host register. | 2 | by convention v0 through v31. |
3 | 3 | ||
4 | Reviewed-by: Emilio G. Cota <cota@braap.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
5 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
6 | --- | 6 | --- |
7 | tcg/i386/tcg-target.h | 5 +++-- | 7 | tcg/ppc/tcg-target.h | 11 ++++- |
8 | tcg/i386/tcg-target.inc.c | 6 ++++++ | 8 | tcg/ppc/tcg-target.inc.c | 88 +++++++++++++++++++++++++--------------- |
9 | 2 files changed, 9 insertions(+), 2 deletions(-) | 9 | 2 files changed, 65 insertions(+), 34 deletions(-) |
10 | 10 | ||
11 | diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h | 11 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h |
12 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/tcg/i386/tcg-target.h | 13 | --- a/tcg/ppc/tcg-target.h |
14 | +++ b/tcg/i386/tcg-target.h | 14 | +++ b/tcg/ppc/tcg-target.h |
15 | @@ -XXX,XX +XXX,XX @@ extern bool have_avx2; | 15 | @@ -XXX,XX +XXX,XX @@ |
16 | #define TCG_TARGET_HAS_direct_jump 1 | 16 | # define TCG_TARGET_REG_BITS 32 |
17 | 17 | #endif | |
18 | #if TCG_TARGET_REG_BITS == 64 | 18 | |
19 | -#define TCG_TARGET_HAS_extrl_i64_i32 0 | 19 | -#define TCG_TARGET_NB_REGS 32 |
20 | -#define TCG_TARGET_HAS_extrh_i64_i32 0 | 20 | +#define TCG_TARGET_NB_REGS 64 |
21 | +/* Keep target addresses zero-extended in a register. */ | 21 | #define TCG_TARGET_INSN_UNIT_SIZE 4 |
22 | +#define TCG_TARGET_HAS_extrl_i64_i32 (TARGET_LONG_BITS == 32) | 22 | #define TCG_TARGET_TLB_DISPLACEMENT_BITS 16 |
23 | +#define TCG_TARGET_HAS_extrh_i64_i32 (TARGET_LONG_BITS == 32) | 23 | |
24 | #define TCG_TARGET_HAS_div2_i64 1 | 24 | @@ -XXX,XX +XXX,XX @@ typedef enum { |
25 | #define TCG_TARGET_HAS_rot_i64 1 | 25 | TCG_REG_R24, TCG_REG_R25, TCG_REG_R26, TCG_REG_R27, |
26 | #define TCG_TARGET_HAS_ext8s_i64 1 | 26 | TCG_REG_R28, TCG_REG_R29, TCG_REG_R30, TCG_REG_R31, |
27 | diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c | 27 | |
28 | + TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3, | ||
29 | + TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7, | ||
30 | + TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11, | ||
31 | + TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15, | ||
32 | + TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19, | ||
33 | + TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23, | ||
34 | + TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27, | ||
35 | + TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31, | ||
36 | + | ||
37 | TCG_REG_CALL_STACK = TCG_REG_R1, | ||
38 | TCG_AREG0 = TCG_REG_R27 | ||
39 | } TCGReg; | ||
40 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
28 | index XXXXXXX..XXXXXXX 100644 | 41 | index XXXXXXX..XXXXXXX 100644 |
29 | --- a/tcg/i386/tcg-target.inc.c | 42 | --- a/tcg/ppc/tcg-target.inc.c |
30 | +++ b/tcg/i386/tcg-target.inc.c | 43 | +++ b/tcg/ppc/tcg-target.inc.c |
31 | @@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, | 44 | @@ -XXX,XX +XXX,XX @@ |
32 | break; | 45 | # define TCG_REG_TMP1 TCG_REG_R12 |
33 | case INDEX_op_extu_i32_i64: | ||
34 | case INDEX_op_ext32u_i64: | ||
35 | + case INDEX_op_extrl_i64_i32: | ||
36 | tcg_out_ext32u(s, a0, a1); | ||
37 | break; | ||
38 | case INDEX_op_ext_i32_i64: | ||
39 | case INDEX_op_ext32s_i64: | ||
40 | tcg_out_ext32s(s, a0, a1); | ||
41 | break; | ||
42 | + case INDEX_op_extrh_i64_i32: | ||
43 | + tcg_out_shifti(s, SHIFT_SHR + P_REXW, a0, 32); | ||
44 | + break; | ||
45 | #endif | 46 | #endif |
46 | 47 | ||
47 | OP_32_64(deposit): | 48 | +#define TCG_VEC_TMP1 TCG_REG_V0 |
48 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | 49 | +#define TCG_VEC_TMP2 TCG_REG_V1 |
49 | case INDEX_op_neg_i64: | 50 | + |
50 | case INDEX_op_not_i32: | 51 | #define TCG_REG_TB TCG_REG_R31 |
51 | case INDEX_op_not_i64: | 52 | #define USE_REG_TB (TCG_TARGET_REG_BITS == 64) |
52 | + case INDEX_op_extrh_i64_i32: | 53 | |
53 | return &r_0; | 54 | @@ -XXX,XX +XXX,XX @@ bool have_isa_3_00; |
54 | 55 | #endif | |
55 | case INDEX_op_ext8s_i32: | 56 | |
56 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | 57 | #ifdef CONFIG_DEBUG_TCG |
57 | case INDEX_op_ext32u_i64: | 58 | -static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { |
58 | case INDEX_op_ext_i32_i64: | 59 | - "r0", |
59 | case INDEX_op_extu_i32_i64: | 60 | - "r1", |
60 | + case INDEX_op_extrl_i64_i32: | 61 | - "r2", |
61 | case INDEX_op_extract_i32: | 62 | - "r3", |
62 | case INDEX_op_extract_i64: | 63 | - "r4", |
63 | case INDEX_op_sextract_i32: | 64 | - "r5", |
65 | - "r6", | ||
66 | - "r7", | ||
67 | - "r8", | ||
68 | - "r9", | ||
69 | - "r10", | ||
70 | - "r11", | ||
71 | - "r12", | ||
72 | - "r13", | ||
73 | - "r14", | ||
74 | - "r15", | ||
75 | - "r16", | ||
76 | - "r17", | ||
77 | - "r18", | ||
78 | - "r19", | ||
79 | - "r20", | ||
80 | - "r21", | ||
81 | - "r22", | ||
82 | - "r23", | ||
83 | - "r24", | ||
84 | - "r25", | ||
85 | - "r26", | ||
86 | - "r27", | ||
87 | - "r28", | ||
88 | - "r29", | ||
89 | - "r30", | ||
90 | - "r31" | ||
91 | +static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = { | ||
92 | + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", | ||
93 | + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", | ||
94 | + "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", | ||
95 | + "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", | ||
96 | + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", | ||
97 | + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", | ||
98 | + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", | ||
99 | + "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", | ||
100 | }; | ||
101 | #endif | ||
102 | |||
103 | @@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = { | ||
104 | TCG_REG_R5, | ||
105 | TCG_REG_R4, | ||
106 | TCG_REG_R3, | ||
107 | + | ||
108 | + /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */ | ||
109 | + TCG_REG_V2, /* call clobbered, vectors */ | ||
110 | + TCG_REG_V3, | ||
111 | + TCG_REG_V4, | ||
112 | + TCG_REG_V5, | ||
113 | + TCG_REG_V6, | ||
114 | + TCG_REG_V7, | ||
115 | + TCG_REG_V8, | ||
116 | + TCG_REG_V9, | ||
117 | + TCG_REG_V10, | ||
118 | + TCG_REG_V11, | ||
119 | + TCG_REG_V12, | ||
120 | + TCG_REG_V13, | ||
121 | + TCG_REG_V14, | ||
122 | + TCG_REG_V15, | ||
123 | + TCG_REG_V16, | ||
124 | + TCG_REG_V17, | ||
125 | + TCG_REG_V18, | ||
126 | + TCG_REG_V19, | ||
127 | }; | ||
128 | |||
129 | static const int tcg_target_call_iarg_regs[] = { | ||
130 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | ||
131 | tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11); | ||
132 | tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12); | ||
133 | |||
134 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0); | ||
135 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1); | ||
136 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2); | ||
137 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3); | ||
138 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4); | ||
139 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5); | ||
140 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6); | ||
141 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7); | ||
142 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8); | ||
143 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9); | ||
144 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10); | ||
145 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11); | ||
146 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12); | ||
147 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13); | ||
148 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14); | ||
149 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15); | ||
150 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16); | ||
151 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17); | ||
152 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18); | ||
153 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19); | ||
154 | + | ||
155 | s->reserved_regs = 0; | ||
156 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */ | ||
157 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */ | ||
158 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | ||
159 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */ | ||
160 | #endif | ||
161 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */ | ||
162 | + tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1); | ||
163 | + tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2); | ||
164 | if (USE_REG_TB) { | ||
165 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB); /* tb->tc_ptr */ | ||
166 | } | ||
64 | -- | 167 | -- |
65 | 2.17.2 | 168 | 2.17.1 |
66 | 169 | ||
67 | 170 | diff view generated by jsdifflib |
1 | From: "Emilio G. Cota" <cota@braap.org> | 1 | Introduce macro VX4() used for encoding Altivec instructions. |
---|---|---|---|
2 | 2 | ||
3 | Which we forgot to do in bd224fce60 ("qht-bench: add -p flag | ||
4 | to precompute hash values", 2018-09-26). | ||
5 | |||
6 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
7 | Signed-off-by: Emilio G. Cota <cota@braap.org> | ||
8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
4 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
9 | --- | 5 | --- |
10 | tests/qht-bench.c | 1 + | 6 | tcg/ppc/tcg-target.inc.c | 1 + |
11 | 1 file changed, 1 insertion(+) | 7 | 1 file changed, 1 insertion(+) |
12 | 8 | ||
13 | diff --git a/tests/qht-bench.c b/tests/qht-bench.c | 9 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c |
14 | index XXXXXXX..XXXXXXX 100644 | 10 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/tests/qht-bench.c | 11 | --- a/tcg/ppc/tcg-target.inc.c |
16 | +++ b/tests/qht-bench.c | 12 | +++ b/tcg/ppc/tcg-target.inc.c |
17 | @@ -XXX,XX +XXX,XX @@ static const char commands_string[] = | 13 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, |
18 | " -n = number of threads\n" | 14 | #define XO31(opc) (OPCD(31)|((opc)<<1)) |
19 | "\n" | 15 | #define XO58(opc) (OPCD(58)|(opc)) |
20 | " -o = offset at which keys start\n" | 16 | #define XO62(opc) (OPCD(62)|(opc)) |
21 | + " -p = precompute hashes\n" | 17 | +#define VX4(opc) (OPCD(4)|(opc)) |
22 | "\n" | 18 | |
23 | " -g = set -s,-k,-K,-l,-r to the same value\n" | 19 | #define B OPCD( 18) |
24 | " -s = initial size hint\n" | 20 | #define BC OPCD( 16) |
25 | -- | 21 | -- |
26 | 2.17.2 | 22 | 2.17.1 |
27 | 23 | ||
28 | 24 | diff view generated by jsdifflib |
1 | From: "Emilio G. Cota" <cota@braap.org> | 1 | Introduce macros VRT(), VRA(), VRB(), VRC() used for encoding |
---|---|---|---|
2 | elements of Altivec instructions. | ||
2 | 3 | ||
3 | Change the order in which we extract a/b and c/d to | 4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
4 | match the output of the upstream xxhash32. | 5 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> |
6 | --- | ||
7 | tcg/ppc/tcg-target.inc.c | 5 +++++ | ||
8 | 1 file changed, 5 insertions(+) | ||
5 | 9 | ||
6 | Tested with: | 10 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c |
7 | https://github.com/cota/xxhash/tree/qemu | ||
8 | |||
9 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
10 | Tested-by: Alex Bennée <alex.bennee@linaro.org> | ||
11 | Signed-off-by: Emilio G. Cota <cota@braap.org> | ||
12 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
13 | --- | ||
14 | include/qemu/xxhash.h | 8 ++++---- | ||
15 | 1 file changed, 4 insertions(+), 4 deletions(-) | ||
16 | |||
17 | diff --git a/include/qemu/xxhash.h b/include/qemu/xxhash.h | ||
18 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/include/qemu/xxhash.h | 12 | --- a/tcg/ppc/tcg-target.inc.c |
20 | +++ b/include/qemu/xxhash.h | 13 | +++ b/tcg/ppc/tcg-target.inc.c |
21 | @@ -XXX,XX +XXX,XX @@ qemu_xxhash7(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f, uint32_t g) | 14 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, |
22 | uint32_t v2 = QEMU_XXHASH_SEED + PRIME32_2; | 15 | #define MB64(b) ((b)<<5) |
23 | uint32_t v3 = QEMU_XXHASH_SEED + 0; | 16 | #define FXM(b) (1 << (19 - (b))) |
24 | uint32_t v4 = QEMU_XXHASH_SEED - PRIME32_1; | 17 | |
25 | - uint32_t a = ab >> 32; | 18 | +#define VRT(r) (((r) & 31) << 21) |
26 | - uint32_t b = ab; | 19 | +#define VRA(r) (((r) & 31) << 16) |
27 | - uint32_t c = cd >> 32; | 20 | +#define VRB(r) (((r) & 31) << 11) |
28 | - uint32_t d = cd; | 21 | +#define VRC(r) (((r) & 31) << 6) |
29 | + uint32_t a = ab; | 22 | + |
30 | + uint32_t b = ab >> 32; | 23 | #define LK 1 |
31 | + uint32_t c = cd; | 24 | |
32 | + uint32_t d = cd >> 32; | 25 | #define TAB(t, a, b) (RT(t) | RA(a) | RB(b)) |
33 | uint32_t h32; | ||
34 | |||
35 | v1 += a * PRIME32_2; | ||
36 | -- | 26 | -- |
37 | 2.17.2 | 27 | 2.17.1 |
38 | 28 | ||
39 | 29 | diff view generated by jsdifflib |
1 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | 1 | Introduce an enum to hold base < 2.06 < 3.00. Use macros to |
---|---|---|---|
2 | Reviewed-by: Emilio G. Cota <cota@braap.org> | 2 | preserve the existing have_isa_2_06 and have_isa_3_00 predicates. |
3 | |||
4 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
4 | --- | 6 | --- |
5 | tcg/i386/tcg-target.h | 2 +- | 7 | tcg/ppc/tcg-target.h | 12 ++++++++++-- |
6 | 1 file changed, 1 insertion(+), 1 deletion(-) | 8 | tcg/ppc/tcg-target.inc.c | 8 ++++---- |
9 | 2 files changed, 14 insertions(+), 6 deletions(-) | ||
7 | 10 | ||
8 | diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h | 11 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h |
9 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
10 | --- a/tcg/i386/tcg-target.h | 13 | --- a/tcg/ppc/tcg-target.h |
11 | +++ b/tcg/i386/tcg-target.h | 14 | +++ b/tcg/ppc/tcg-target.h |
12 | @@ -XXX,XX +XXX,XX @@ typedef enum { | 15 | @@ -XXX,XX +XXX,XX @@ typedef enum { |
13 | TCG_REG_RDI = TCG_REG_EDI, | 16 | TCG_AREG0 = TCG_REG_R27 |
14 | |||
15 | TCG_AREG0 = TCG_REG_EBP, | ||
16 | + TCG_REG_CALL_STACK = TCG_REG_ESP | ||
17 | } TCGReg; | 17 | } TCGReg; |
18 | 18 | ||
19 | /* used for function call generation */ | 19 | -extern bool have_isa_2_06; |
20 | -#define TCG_REG_CALL_STACK TCG_REG_ESP | 20 | -extern bool have_isa_3_00; |
21 | #define TCG_TARGET_STACK_ALIGN 16 | 21 | +typedef enum { |
22 | #if defined(_WIN64) | 22 | + tcg_isa_base, |
23 | #define TCG_TARGET_CALL_STACK_OFFSET 32 | 23 | + tcg_isa_2_06, |
24 | + tcg_isa_3_00, | ||
25 | +} TCGPowerISA; | ||
26 | + | ||
27 | +extern TCGPowerISA have_isa; | ||
28 | + | ||
29 | +#define have_isa_2_06 (have_isa >= tcg_isa_2_06) | ||
30 | +#define have_isa_3_00 (have_isa >= tcg_isa_3_00) | ||
31 | |||
32 | /* optional instructions automatically implemented */ | ||
33 | #define TCG_TARGET_HAS_ext8u_i32 0 /* andi */ | ||
34 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
35 | index XXXXXXX..XXXXXXX 100644 | ||
36 | --- a/tcg/ppc/tcg-target.inc.c | ||
37 | +++ b/tcg/ppc/tcg-target.inc.c | ||
38 | @@ -XXX,XX +XXX,XX @@ | ||
39 | |||
40 | static tcg_insn_unit *tb_ret_addr; | ||
41 | |||
42 | -bool have_isa_2_06; | ||
43 | -bool have_isa_3_00; | ||
44 | +TCGPowerISA have_isa; | ||
45 | |||
46 | #define HAVE_ISA_2_06 have_isa_2_06 | ||
47 | #define HAVE_ISEL have_isa_2_06 | ||
48 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | ||
49 | unsigned long hwcap = qemu_getauxval(AT_HWCAP); | ||
50 | unsigned long hwcap2 = qemu_getauxval(AT_HWCAP2); | ||
51 | |||
52 | + have_isa = tcg_isa_base; | ||
53 | if (hwcap & PPC_FEATURE_ARCH_2_06) { | ||
54 | - have_isa_2_06 = true; | ||
55 | + have_isa = tcg_isa_2_06; | ||
56 | } | ||
57 | #ifdef PPC_FEATURE2_ARCH_3_00 | ||
58 | if (hwcap2 & PPC_FEATURE2_ARCH_3_00) { | ||
59 | - have_isa_3_00 = true; | ||
60 | + have_isa = tcg_isa_3_00; | ||
61 | } | ||
62 | #endif | ||
63 | |||
24 | -- | 64 | -- |
25 | 2.17.2 | 65 | 2.17.1 |
26 | 66 | ||
27 | 67 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | It is unused since b68686bd4bfeb70040b4099df993dfa0b4f37b03. | ||
2 | 1 | ||
3 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | --- | ||
6 | tcg/aarch64/tcg-target.inc.c | 12 ------------ | ||
7 | 1 file changed, 12 deletions(-) | ||
8 | |||
9 | diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c | ||
10 | index XXXXXXX..XXXXXXX 100644 | ||
11 | --- a/tcg/aarch64/tcg-target.inc.c | ||
12 | +++ b/tcg/aarch64/tcg-target.inc.c | ||
13 | @@ -XXX,XX +XXX,XX @@ static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target) | ||
14 | *code_ptr = deposit32(*code_ptr, 0, 26, offset); | ||
15 | } | ||
16 | |||
17 | -static inline void reloc_pc26_atomic(tcg_insn_unit *code_ptr, | ||
18 | - tcg_insn_unit *target) | ||
19 | -{ | ||
20 | - ptrdiff_t offset = target - code_ptr; | ||
21 | - tcg_insn_unit insn; | ||
22 | - tcg_debug_assert(offset == sextract64(offset, 0, 26)); | ||
23 | - /* read instruction, mask away previous PC_REL26 parameter contents, | ||
24 | - set the proper offset, then write back the instruction. */ | ||
25 | - insn = atomic_read(code_ptr); | ||
26 | - atomic_set(code_ptr, deposit32(insn, 0, 26, offset)); | ||
27 | -} | ||
28 | - | ||
29 | static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target) | ||
30 | { | ||
31 | ptrdiff_t offset = target - code_ptr; | ||
32 | -- | ||
33 | 2.17.2 | ||
34 | |||
35 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | There are one use apiece for these. There is no longer a need for | ||
2 | preserving branch offset operands, as we no longer re-translate. | ||
3 | 1 | ||
4 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | tcg/aarch64/tcg-target.inc.c | 21 ++------------------- | ||
8 | 1 file changed, 2 insertions(+), 19 deletions(-) | ||
9 | |||
10 | diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/tcg/aarch64/tcg-target.inc.c | ||
13 | +++ b/tcg/aarch64/tcg-target.inc.c | ||
14 | @@ -XXX,XX +XXX,XX @@ static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target) | ||
15 | } | ||
16 | } | ||
17 | |||
18 | -static inline void tcg_out_goto_noaddr(TCGContext *s) | ||
19 | -{ | ||
20 | - /* We pay attention here to not modify the branch target by reading from | ||
21 | - the buffer. This ensure that caches and memory are kept coherent during | ||
22 | - retranslation. Mask away possible garbage in the high bits for the | ||
23 | - first translation, while keeping the offset bits for retranslation. */ | ||
24 | - uint32_t old = tcg_in32(s); | ||
25 | - tcg_out_insn(s, 3206, B, old); | ||
26 | -} | ||
27 | - | ||
28 | -static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c) | ||
29 | -{ | ||
30 | - /* See comments in tcg_out_goto_noaddr. */ | ||
31 | - uint32_t old = tcg_in32(s) >> 5; | ||
32 | - tcg_out_insn(s, 3202, B_C, c, old); | ||
33 | -} | ||
34 | - | ||
35 | static inline void tcg_out_callr(TCGContext *s, TCGReg reg) | ||
36 | { | ||
37 | tcg_out_insn(s, 3207, BLR, reg); | ||
38 | @@ -XXX,XX +XXX,XX @@ static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) | ||
39 | { | ||
40 | if (!l->has_value) { | ||
41 | tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0); | ||
42 | - tcg_out_goto_noaddr(s); | ||
43 | + tcg_out_insn(s, 3206, B, 0); | ||
44 | } else { | ||
45 | tcg_out_goto(s, l->u.value_ptr); | ||
46 | } | ||
47 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc, | ||
48 | |||
49 | /* If not equal, we jump to the slow path. */ | ||
50 | *label_ptr = s->code_ptr; | ||
51 | - tcg_out_goto_cond_noaddr(s, TCG_COND_NE); | ||
52 | + tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); | ||
53 | } | ||
54 | |||
55 | #endif /* CONFIG_SOFTMMU */ | ||
56 | -- | ||
57 | 2.17.2 | ||
58 | |||
59 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | It is unused since 3fb53fb4d12f2e7833bd1659e6013237b130ef20. | ||
2 | 1 | ||
3 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
5 | --- | ||
6 | tcg/arm/tcg-target.inc.c | 8 -------- | ||
7 | 1 file changed, 8 deletions(-) | ||
8 | |||
9 | diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c | ||
10 | index XXXXXXX..XXXXXXX 100644 | ||
11 | --- a/tcg/arm/tcg-target.inc.c | ||
12 | +++ b/tcg/arm/tcg-target.inc.c | ||
13 | @@ -XXX,XX +XXX,XX @@ static inline void reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target) | ||
14 | *code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff); | ||
15 | } | ||
16 | |||
17 | -static inline void reloc_pc24_atomic(tcg_insn_unit *code_ptr, tcg_insn_unit *target) | ||
18 | -{ | ||
19 | - ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2; | ||
20 | - tcg_insn_unit insn = atomic_read(code_ptr); | ||
21 | - tcg_debug_assert(offset == sextract32(offset, 0, 24)); | ||
22 | - atomic_set(code_ptr, deposit32(insn, 0, 24, offset)); | ||
23 | -} | ||
24 | - | ||
25 | static void patch_reloc(tcg_insn_unit *code_ptr, int type, | ||
26 | intptr_t value, intptr_t addend) | ||
27 | { | ||
28 | -- | ||
29 | 2.17.2 | ||
30 | |||
31 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | There are one use apiece for these. There is no longer a need for | ||
2 | preserving branch offset operands, as we no longer re-translate. | ||
3 | 1 | ||
4 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | tcg/arm/tcg-target.inc.c | 22 +++------------------- | ||
8 | 1 file changed, 3 insertions(+), 19 deletions(-) | ||
9 | |||
10 | diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/tcg/arm/tcg-target.inc.c | ||
13 | +++ b/tcg/arm/tcg-target.inc.c | ||
14 | @@ -XXX,XX +XXX,XX @@ static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset) | ||
15 | (((offset - 8) >> 2) & 0x00ffffff)); | ||
16 | } | ||
17 | |||
18 | -static inline void tcg_out_b_noaddr(TCGContext *s, int cond) | ||
19 | -{ | ||
20 | - /* We pay attention here to not modify the branch target by masking | ||
21 | - the corresponding bytes. This ensure that caches and memory are | ||
22 | - kept coherent during retranslation. */ | ||
23 | - tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0a)); | ||
24 | -} | ||
25 | - | ||
26 | -static inline void tcg_out_bl_noaddr(TCGContext *s, int cond) | ||
27 | -{ | ||
28 | - /* We pay attention here to not modify the branch target by masking | ||
29 | - the corresponding bytes. This ensure that caches and memory are | ||
30 | - kept coherent during retranslation. */ | ||
31 | - tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0b)); | ||
32 | -} | ||
33 | - | ||
34 | static inline void tcg_out_bl(TCGContext *s, int cond, int32_t offset) | ||
35 | { | ||
36 | tcg_out32(s, (cond << 28) | 0x0b000000 | | ||
37 | @@ -XXX,XX +XXX,XX @@ static inline void tcg_out_goto_label(TCGContext *s, int cond, TCGLabel *l) | ||
38 | tcg_out_goto(s, cond, l->u.value_ptr); | ||
39 | } else { | ||
40 | tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, l, 0); | ||
41 | - tcg_out_b_noaddr(s, cond); | ||
42 | + tcg_out_b(s, cond, 0); | ||
43 | } | ||
44 | } | ||
45 | |||
46 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) | ||
47 | /* This a conditional BL only to load a pointer within this opcode into LR | ||
48 | for the slow path. We will not be using the value for a tail call. */ | ||
49 | label_ptr = s->code_ptr; | ||
50 | - tcg_out_bl_noaddr(s, COND_NE); | ||
51 | + tcg_out_bl(s, COND_NE, 0); | ||
52 | |||
53 | tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend); | ||
54 | |||
55 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) | ||
56 | |||
57 | /* The conditional call must come last, as we're going to return here. */ | ||
58 | label_ptr = s->code_ptr; | ||
59 | - tcg_out_bl_noaddr(s, COND_NE); | ||
60 | + tcg_out_bl(s, COND_NE, 0); | ||
61 | |||
62 | add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi, | ||
63 | s->code_ptr, label_ptr); | ||
64 | -- | ||
65 | 2.17.2 | ||
66 | |||
67 | diff view generated by jsdifflib |
1 | From: "Emilio G. Cota" <cota@braap.org> | 1 | This is identical to have_isa_2_06, so replace it. |
---|---|---|---|
2 | 2 | ||
3 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | 3 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> |
4 | Signed-off-by: Emilio G. Cota <cota@braap.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
6 | --- | 5 | --- |
7 | include/exec/tb-hash.h | 2 +- | 6 | tcg/ppc/tcg-target.inc.c | 5 ++--- |
8 | include/{exec/tb-hash-xx.h => qemu/xxhash.h} | 6 +++--- | 7 | 1 file changed, 2 insertions(+), 3 deletions(-) |
9 | tests/qht-bench.c | 2 +- | ||
10 | util/qsp.c | 2 +- | ||
11 | 4 files changed, 6 insertions(+), 6 deletions(-) | ||
12 | rename include/{exec/tb-hash-xx.h => qemu/xxhash.h} (97%) | ||
13 | 8 | ||
14 | diff --git a/include/exec/tb-hash.h b/include/exec/tb-hash.h | 9 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c |
15 | index XXXXXXX..XXXXXXX 100644 | 10 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/include/exec/tb-hash.h | 11 | --- a/tcg/ppc/tcg-target.inc.c |
17 | +++ b/include/exec/tb-hash.h | 12 | +++ b/tcg/ppc/tcg-target.inc.c |
18 | @@ -XXX,XX +XXX,XX @@ | 13 | @@ -XXX,XX +XXX,XX @@ static tcg_insn_unit *tb_ret_addr; |
19 | #ifndef EXEC_TB_HASH_H | 14 | |
20 | #define EXEC_TB_HASH_H | 15 | TCGPowerISA have_isa; |
21 | 16 | ||
22 | -#include "exec/tb-hash-xx.h" | 17 | -#define HAVE_ISA_2_06 have_isa_2_06 |
23 | +#include "qemu/xxhash.h" | 18 | #define HAVE_ISEL have_isa_2_06 |
24 | 19 | ||
25 | #ifdef CONFIG_SOFTMMU | 20 | #ifndef CONFIG_SOFTMMU |
26 | 21 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) | |
27 | diff --git a/include/exec/tb-hash-xx.h b/include/qemu/xxhash.h | 22 | } |
28 | similarity index 97% | 23 | } else { |
29 | rename from include/exec/tb-hash-xx.h | 24 | uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)]; |
30 | rename to include/qemu/xxhash.h | 25 | - if (!HAVE_ISA_2_06 && insn == LDBRX) { |
31 | index XXXXXXX..XXXXXXX 100644 | 26 | + if (!have_isa_2_06 && insn == LDBRX) { |
32 | --- a/include/exec/tb-hash-xx.h | 27 | tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); |
33 | +++ b/include/qemu/xxhash.h | 28 | tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo)); |
34 | @@ -XXX,XX +XXX,XX @@ | 29 | tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0)); |
35 | * - xxHash source repository : https://github.com/Cyan4973/xxHash | 30 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) |
36 | */ | 31 | } |
37 | 32 | } else { | |
38 | -#ifndef EXEC_TB_HASH_XX_H | 33 | uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)]; |
39 | -#define EXEC_TB_HASH_XX_H | 34 | - if (!HAVE_ISA_2_06 && insn == STDBRX) { |
40 | +#ifndef QEMU_XXHASH_H | 35 | + if (!have_isa_2_06 && insn == STDBRX) { |
41 | +#define QEMU_XXHASH_H | 36 | tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo)); |
42 | 37 | tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, addrlo, 4)); | |
43 | #include "qemu/bitops.h" | 38 | tcg_out_shri64(s, TCG_REG_R0, datalo, 32); |
44 | |||
45 | @@ -XXX,XX +XXX,XX @@ static inline uint32_t qemu_xxhash6(uint64_t ab, uint64_t cd, uint32_t e, | ||
46 | return qemu_xxhash7(ab, cd, e, f, 0); | ||
47 | } | ||
48 | |||
49 | -#endif /* EXEC_TB_HASH_XX_H */ | ||
50 | +#endif /* QEMU_XXHASH_H */ | ||
51 | diff --git a/tests/qht-bench.c b/tests/qht-bench.c | ||
52 | index XXXXXXX..XXXXXXX 100644 | ||
53 | --- a/tests/qht-bench.c | ||
54 | +++ b/tests/qht-bench.c | ||
55 | @@ -XXX,XX +XXX,XX @@ | ||
56 | #include "qemu/atomic.h" | ||
57 | #include "qemu/qht.h" | ||
58 | #include "qemu/rcu.h" | ||
59 | -#include "exec/tb-hash-xx.h" | ||
60 | +#include "qemu/xxhash.h" | ||
61 | |||
62 | struct thread_stats { | ||
63 | size_t rd; | ||
64 | diff --git a/util/qsp.c b/util/qsp.c | ||
65 | index XXXXXXX..XXXXXXX 100644 | ||
66 | --- a/util/qsp.c | ||
67 | +++ b/util/qsp.c | ||
68 | @@ -XXX,XX +XXX,XX @@ | ||
69 | #include "qemu/timer.h" | ||
70 | #include "qemu/qht.h" | ||
71 | #include "qemu/rcu.h" | ||
72 | -#include "exec/tb-hash-xx.h" | ||
73 | +#include "qemu/xxhash.h" | ||
74 | |||
75 | enum QSPType { | ||
76 | QSP_MUTEX, | ||
77 | -- | 39 | -- |
78 | 2.17.2 | 40 | 2.17.1 |
79 | 41 | ||
80 | 42 | diff view generated by jsdifflib |
1 | This helps preserve the invariant that all TCG_TYPE_I32 values | 1 | Previously we've been hard-coding knowledge that Power7 has ISEL, but |
---|---|---|---|
2 | are stored zero-extended in the 64-bit host registers. | 2 | it was an optional instruction before that. Use the AT_HWCAP2 bit, |
3 | when present, to properly determine support. | ||
3 | 4 | ||
4 | Reviewed-by: Emilio G. Cota <cota@braap.org> | 5 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> |
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
6 | --- | 7 | --- |
7 | tcg/i386/tcg-target.inc.c | 13 +++++++------ | 8 | tcg/ppc/tcg-target.inc.c | 17 ++++++++++++----- |
8 | 1 file changed, 7 insertions(+), 6 deletions(-) | 9 | 1 file changed, 12 insertions(+), 5 deletions(-) |
9 | 10 | ||
10 | diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c | 11 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c |
11 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
12 | --- a/tcg/i386/tcg-target.inc.c | 13 | --- a/tcg/ppc/tcg-target.inc.c |
13 | +++ b/tcg/i386/tcg-target.inc.c | 14 | +++ b/tcg/ppc/tcg-target.inc.c |
14 | @@ -XXX,XX +XXX,XX @@ static inline void setup_guest_base_seg(void) { } | 15 | @@ -XXX,XX +XXX,XX @@ |
15 | 16 | static tcg_insn_unit *tb_ret_addr; | |
16 | static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, | 17 | |
17 | TCGReg base, int index, intptr_t ofs, | 18 | TCGPowerISA have_isa; |
18 | - int seg, TCGMemOp memop) | 19 | - |
19 | + int seg, bool is64, TCGMemOp memop) | 20 | -#define HAVE_ISEL have_isa_2_06 |
20 | { | 21 | +static bool have_isel; |
21 | const TCGMemOp real_bswap = memop & MO_BSWAP; | 22 | |
22 | TCGMemOp bswap = real_bswap; | 23 | #ifndef CONFIG_SOFTMMU |
23 | + int rexw = is64 * P_REXW; | 24 | #define TCG_GUEST_BASE_REG 30 |
24 | int movop = OPC_MOVL_GvEv; | 25 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, |
25 | 26 | /* If we have ISEL, we can implement everything with 3 or 4 insns. | |
26 | if (have_movbe && real_bswap) { | 27 | All other cases below are also at least 3 insns, so speed up the |
27 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, | 28 | code generator by not considering them and always using ISEL. */ |
28 | base, index, 0, ofs); | 29 | - if (HAVE_ISEL) { |
29 | break; | 30 | + if (have_isel) { |
30 | case MO_SB: | 31 | int isel, tab; |
31 | - tcg_out_modrm_sib_offset(s, OPC_MOVSBL + P_REXW + seg, datalo, | 32 | |
32 | + tcg_out_modrm_sib_offset(s, OPC_MOVSBL + rexw + seg, datalo, | 33 | tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); |
33 | base, index, 0, ofs); | 34 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond, |
34 | break; | 35 | |
35 | case MO_UW: | 36 | tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type); |
36 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, | 37 | |
37 | base, index, 0, ofs); | 38 | - if (HAVE_ISEL) { |
38 | tcg_out_rolw_8(s, datalo); | 39 | + if (have_isel) { |
39 | } | 40 | int isel = tcg_to_isel[cond]; |
40 | - tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo); | 41 | |
41 | + tcg_out_modrm(s, OPC_MOVSWL + rexw, datalo, datalo); | 42 | /* Swap the V operands if the operation indicates inversion. */ |
42 | } else { | 43 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc, |
43 | - tcg_out_modrm_sib_offset(s, OPC_MOVSWL + P_REXW + seg, | 44 | } else { |
44 | + tcg_out_modrm_sib_offset(s, OPC_MOVSWL + rexw + seg, | 45 | tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 7, type); |
45 | datalo, base, index, 0, ofs); | 46 | /* Note that the only other valid constant for a2 is 0. */ |
46 | } | 47 | - if (HAVE_ISEL) { |
47 | break; | 48 | + if (have_isel) { |
48 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) | 49 | tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1)); |
49 | label_ptr, offsetof(CPUTLBEntry, addr_read)); | 50 | tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0)); |
50 | 51 | } else if (!const_a2 && a0 == a2) { | |
51 | /* TLB Hit. */ | 52 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) |
52 | - tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc); | ||
53 | + tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, is64, opc); | ||
54 | |||
55 | /* Record the current context of a load into ldst label */ | ||
56 | add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi, | ||
57 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) | ||
58 | } | ||
59 | |||
60 | tcg_out_qemu_ld_direct(s, datalo, datahi, | ||
61 | - base, index, offset, seg, opc); | ||
62 | + base, index, offset, seg, is64, opc); | ||
63 | } | 53 | } |
64 | #endif | 54 | #endif |
65 | } | 55 | |
56 | +#ifdef PPC_FEATURE2_HAS_ISEL | ||
57 | + /* Prefer explicit instruction from the kernel. */ | ||
58 | + have_isel = (hwcap2 & PPC_FEATURE2_HAS_ISEL) != 0; | ||
59 | +#else | ||
60 | + /* Fall back to knowing Power7 (2.06) has ISEL. */ | ||
61 | + have_isel = have_isa_2_06; | ||
62 | +#endif | ||
63 | + | ||
64 | tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff; | ||
65 | tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff; | ||
66 | |||
66 | -- | 67 | -- |
67 | 2.17.2 | 68 | 2.17.1 |
68 | 69 | ||
69 | 70 | diff view generated by jsdifflib |
1 | For now, defined universally as true, since we previously required | 1 | Introduce all of the flags required to enable tcg backend vector support, |
---|---|---|---|
2 | backends to implement swapped memory operations. Future patches | 2 | and a runtime flag to indicate the host supports Altivec instructions. |
3 | may now remove that support where it is onerous. | 3 | |
4 | For now, do not actually set have_isa_altivec to true, because we have not | ||
5 | yet added all of the code to actually generate all of the required insns. | ||
6 | However, we must define these flags in order to disable ifndefs that create | ||
7 | stub versions of the functions added here. | ||
8 | |||
9 | The change to tcg_out_movi works around a buglet in tcg.c wherein if we | ||
10 | do not define tcg_out_dupi_vec we get a declared but not defined Werror, | ||
11 | but if we only declare it we get a defined but not used Werror. We need | ||
12 | to this change to tcg_out_movi eventually anyway, so it's no biggie. | ||
4 | 13 | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 14 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
15 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
6 | --- | 16 | --- |
7 | tcg/aarch64/tcg-target.h | 1 + | 17 | tcg/ppc/tcg-target.h | 25 ++++++++++++++++ |
8 | tcg/arm/tcg-target.h | 1 + | 18 | tcg/ppc/tcg-target.opc.h | 5 ++++ |
9 | tcg/i386/tcg-target.h | 2 + | 19 | tcg/ppc/tcg-target.inc.c | 62 ++++++++++++++++++++++++++++++++++++++-- |
10 | tcg/mips/tcg-target.h | 1 + | 20 | 3 files changed, 89 insertions(+), 3 deletions(-) |
11 | tcg/ppc/tcg-target.h | 1 + | 21 | create mode 100644 tcg/ppc/tcg-target.opc.h |
12 | tcg/s390/tcg-target.h | 1 + | ||
13 | tcg/sparc/tcg-target.h | 1 + | ||
14 | tcg/tci/tcg-target.h | 2 + | ||
15 | tcg/tcg-op.c | 118 ++++++++++++++++++++++++++++++++++++++- | ||
16 | 9 files changed, 126 insertions(+), 2 deletions(-) | ||
17 | 22 | ||
18 | diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/tcg/aarch64/tcg-target.h | ||
21 | +++ b/tcg/aarch64/tcg-target.h | ||
22 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
23 | #define TCG_TARGET_HAS_mul_vec 1 | ||
24 | |||
25 | #define TCG_TARGET_DEFAULT_MO (0) | ||
26 | +#define TCG_TARGET_HAS_MEMORY_BSWAP 1 | ||
27 | |||
28 | static inline void flush_icache_range(uintptr_t start, uintptr_t stop) | ||
29 | { | ||
30 | diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h | ||
31 | index XXXXXXX..XXXXXXX 100644 | ||
32 | --- a/tcg/arm/tcg-target.h | ||
33 | +++ b/tcg/arm/tcg-target.h | ||
34 | @@ -XXX,XX +XXX,XX @@ enum { | ||
35 | }; | ||
36 | |||
37 | #define TCG_TARGET_DEFAULT_MO (0) | ||
38 | +#define TCG_TARGET_HAS_MEMORY_BSWAP 1 | ||
39 | |||
40 | static inline void flush_icache_range(uintptr_t start, uintptr_t stop) | ||
41 | { | ||
42 | diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/tcg/i386/tcg-target.h | ||
45 | +++ b/tcg/i386/tcg-target.h | ||
46 | @@ -XXX,XX +XXX,XX @@ static inline void tb_target_set_jmp_target(uintptr_t tc_ptr, | ||
47 | |||
48 | #define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD) | ||
49 | |||
50 | +#define TCG_TARGET_HAS_MEMORY_BSWAP 1 | ||
51 | + | ||
52 | #ifdef CONFIG_SOFTMMU | ||
53 | #define TCG_TARGET_NEED_LDST_LABELS | ||
54 | #endif | ||
55 | diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h | ||
56 | index XXXXXXX..XXXXXXX 100644 | ||
57 | --- a/tcg/mips/tcg-target.h | ||
58 | +++ b/tcg/mips/tcg-target.h | ||
59 | @@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions; | ||
60 | #endif | ||
61 | |||
62 | #define TCG_TARGET_DEFAULT_MO (0) | ||
63 | +#define TCG_TARGET_HAS_MEMORY_BSWAP 1 | ||
64 | |||
65 | static inline void flush_icache_range(uintptr_t start, uintptr_t stop) | ||
66 | { | ||
67 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | 23 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h |
68 | index XXXXXXX..XXXXXXX 100644 | 24 | index XXXXXXX..XXXXXXX 100644 |
69 | --- a/tcg/ppc/tcg-target.h | 25 | --- a/tcg/ppc/tcg-target.h |
70 | +++ b/tcg/ppc/tcg-target.h | 26 | +++ b/tcg/ppc/tcg-target.h |
71 | @@ -XXX,XX +XXX,XX @@ void flush_icache_range(uintptr_t start, uintptr_t stop); | 27 | @@ -XXX,XX +XXX,XX @@ typedef enum { |
28 | } TCGPowerISA; | ||
29 | |||
30 | extern TCGPowerISA have_isa; | ||
31 | +extern bool have_altivec; | ||
32 | |||
33 | #define have_isa_2_06 (have_isa >= tcg_isa_2_06) | ||
34 | #define have_isa_3_00 (have_isa >= tcg_isa_3_00) | ||
35 | @@ -XXX,XX +XXX,XX @@ extern TCGPowerISA have_isa; | ||
36 | #define TCG_TARGET_HAS_mulsh_i64 1 | ||
37 | #endif | ||
38 | |||
39 | +/* | ||
40 | + * While technically Altivec could support V64, it has no 64-bit store | ||
41 | + * instruction and substituting two 32-bit stores makes the generated | ||
42 | + * code quite large. | ||
43 | + */ | ||
44 | +#define TCG_TARGET_HAS_v64 0 | ||
45 | +#define TCG_TARGET_HAS_v128 have_altivec | ||
46 | +#define TCG_TARGET_HAS_v256 0 | ||
47 | + | ||
48 | +#define TCG_TARGET_HAS_andc_vec 0 | ||
49 | +#define TCG_TARGET_HAS_orc_vec 0 | ||
50 | +#define TCG_TARGET_HAS_not_vec 0 | ||
51 | +#define TCG_TARGET_HAS_neg_vec 0 | ||
52 | +#define TCG_TARGET_HAS_abs_vec 0 | ||
53 | +#define TCG_TARGET_HAS_shi_vec 0 | ||
54 | +#define TCG_TARGET_HAS_shs_vec 0 | ||
55 | +#define TCG_TARGET_HAS_shv_vec 0 | ||
56 | +#define TCG_TARGET_HAS_cmp_vec 0 | ||
57 | +#define TCG_TARGET_HAS_mul_vec 0 | ||
58 | +#define TCG_TARGET_HAS_sat_vec 0 | ||
59 | +#define TCG_TARGET_HAS_minmax_vec 0 | ||
60 | +#define TCG_TARGET_HAS_bitsel_vec 0 | ||
61 | +#define TCG_TARGET_HAS_cmpsel_vec 0 | ||
62 | + | ||
63 | void flush_icache_range(uintptr_t start, uintptr_t stop); | ||
72 | void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t); | 64 | void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t); |
73 | 65 | ||
74 | #define TCG_TARGET_DEFAULT_MO (0) | 66 | diff --git a/tcg/ppc/tcg-target.opc.h b/tcg/ppc/tcg-target.opc.h |
75 | +#define TCG_TARGET_HAS_MEMORY_BSWAP 1 | 67 | new file mode 100644 |
76 | 68 | index XXXXXXX..XXXXXXX | |
77 | #ifdef CONFIG_SOFTMMU | 69 | --- /dev/null |
78 | #define TCG_TARGET_NEED_LDST_LABELS | 70 | +++ b/tcg/ppc/tcg-target.opc.h |
79 | diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h | 71 | @@ -XXX,XX +XXX,XX @@ |
72 | +/* | ||
73 | + * Target-specific opcodes for host vector expansion. These will be | ||
74 | + * emitted by tcg_expand_vec_op. For those familiar with GCC internals, | ||
75 | + * consider these to be UNSPEC with names. | ||
76 | + */ | ||
77 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
80 | index XXXXXXX..XXXXXXX 100644 | 78 | index XXXXXXX..XXXXXXX 100644 |
81 | --- a/tcg/s390/tcg-target.h | 79 | --- a/tcg/ppc/tcg-target.inc.c |
82 | +++ b/tcg/s390/tcg-target.h | 80 | +++ b/tcg/ppc/tcg-target.inc.c |
83 | @@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities; | 81 | @@ -XXX,XX +XXX,XX @@ static tcg_insn_unit *tb_ret_addr; |
84 | #define TCG_TARGET_CALL_STACK_OFFSET 160 | 82 | |
85 | 83 | TCGPowerISA have_isa; | |
86 | #define TCG_TARGET_EXTEND_ARGS 1 | 84 | static bool have_isel; |
87 | +#define TCG_TARGET_HAS_MEMORY_BSWAP 1 | 85 | +bool have_altivec; |
88 | 86 | ||
89 | #define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD) | 87 | #ifndef CONFIG_SOFTMMU |
90 | 88 | #define TCG_GUEST_BASE_REG 30 | |
91 | diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h | 89 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, |
92 | index XXXXXXX..XXXXXXX 100644 | 90 | } |
93 | --- a/tcg/sparc/tcg-target.h | 91 | } |
94 | +++ b/tcg/sparc/tcg-target.h | 92 | |
95 | @@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions; | 93 | -static inline void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, |
96 | #define TCG_AREG0 TCG_REG_I0 | 94 | - tcg_target_long arg) |
97 | 95 | +static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret, | |
98 | #define TCG_TARGET_DEFAULT_MO (0) | 96 | + tcg_target_long val) |
99 | +#define TCG_TARGET_HAS_MEMORY_BSWAP 1 | ||
100 | |||
101 | static inline void flush_icache_range(uintptr_t start, uintptr_t stop) | ||
102 | { | 97 | { |
103 | diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h | 98 | - tcg_out_movi_int(s, type, ret, arg, false); |
104 | index XXXXXXX..XXXXXXX 100644 | 99 | + g_assert_not_reached(); |
105 | --- a/tcg/tci/tcg-target.h | 100 | +} |
106 | +++ b/tcg/tci/tcg-target.h | ||
107 | @@ -XXX,XX +XXX,XX @@ static inline void flush_icache_range(uintptr_t start, uintptr_t stop) | ||
108 | We prefer consistency across hosts on this. */ | ||
109 | #define TCG_TARGET_DEFAULT_MO (0) | ||
110 | |||
111 | +#define TCG_TARGET_HAS_MEMORY_BSWAP 1 | ||
112 | + | 101 | + |
113 | static inline void tb_target_set_jmp_target(uintptr_t tc_ptr, | 102 | +static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, |
114 | uintptr_t jmp_addr, uintptr_t addr) | 103 | + tcg_target_long arg) |
115 | { | 104 | +{ |
116 | diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c | 105 | + switch (type) { |
117 | index XXXXXXX..XXXXXXX 100644 | 106 | + case TCG_TYPE_I32: |
118 | --- a/tcg/tcg-op.c | 107 | + case TCG_TYPE_I64: |
119 | +++ b/tcg/tcg-op.c | 108 | + tcg_debug_assert(ret < TCG_REG_V0); |
120 | @@ -XXX,XX +XXX,XX @@ static void tcg_gen_req_mo(TCGBar type) | 109 | + tcg_out_movi_int(s, type, ret, arg, false); |
121 | 110 | + break; | |
122 | void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop) | ||
123 | { | ||
124 | + TCGMemOp orig_memop; | ||
125 | + | 111 | + |
126 | tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); | 112 | + case TCG_TYPE_V64: |
127 | memop = tcg_canonicalize_memop(memop, 0, 0); | 113 | + case TCG_TYPE_V128: |
128 | trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env, | 114 | + tcg_debug_assert(ret >= TCG_REG_V0); |
129 | addr, trace_mem_get_info(memop, 0)); | 115 | + tcg_out_dupi_vec(s, type, ret, arg); |
116 | + break; | ||
130 | + | 117 | + |
131 | + orig_memop = memop; | 118 | + default: |
132 | + if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) { | 119 | + g_assert_not_reached(); |
133 | + memop &= ~MO_BSWAP; | ||
134 | + /* The bswap primitive requires zero-extended input. */ | ||
135 | + if ((memop & MO_SSIZE) == MO_SW) { | ||
136 | + memop &= ~MO_SIGN; | ||
137 | + } | ||
138 | + } | ||
139 | + | ||
140 | gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx); | ||
141 | + | ||
142 | + if ((orig_memop ^ memop) & MO_BSWAP) { | ||
143 | + switch (orig_memop & MO_SIZE) { | ||
144 | + case MO_16: | ||
145 | + tcg_gen_bswap16_i32(val, val); | ||
146 | + if (orig_memop & MO_SIGN) { | ||
147 | + tcg_gen_ext16s_i32(val, val); | ||
148 | + } | ||
149 | + break; | ||
150 | + case MO_32: | ||
151 | + tcg_gen_bswap32_i32(val, val); | ||
152 | + break; | ||
153 | + default: | ||
154 | + g_assert_not_reached(); | ||
155 | + } | ||
156 | + } | 120 | + } |
157 | } | 121 | } |
158 | 122 | ||
159 | void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop) | 123 | static bool mask_operand(uint32_t c, int *mb, int *me) |
124 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, | ||
125 | } | ||
126 | } | ||
127 | |||
128 | +int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | ||
129 | +{ | ||
130 | + g_assert_not_reached(); | ||
131 | +} | ||
132 | + | ||
133 | +static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, | ||
134 | + TCGReg dst, TCGReg src) | ||
135 | +{ | ||
136 | + g_assert_not_reached(); | ||
137 | +} | ||
138 | + | ||
139 | +static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, | ||
140 | + TCGReg out, TCGReg base, intptr_t offset) | ||
141 | +{ | ||
142 | + g_assert_not_reached(); | ||
143 | +} | ||
144 | + | ||
145 | +static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
146 | + unsigned vecl, unsigned vece, | ||
147 | + const TCGArg *args, const int *const_args) | ||
148 | +{ | ||
149 | + g_assert_not_reached(); | ||
150 | +} | ||
151 | + | ||
152 | +void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, | ||
153 | + TCGArg a0, ...) | ||
154 | +{ | ||
155 | + g_assert_not_reached(); | ||
156 | +} | ||
157 | + | ||
158 | static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
160 | { | 159 | { |
161 | + TCGv_i32 swap = NULL; | 160 | static const TCGTargetOpDef r = { .args_ct_str = { "r" } }; |
162 | + | 161 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) |
163 | tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); | 162 | |
164 | memop = tcg_canonicalize_memop(memop, 0, 1); | 163 | tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff; |
165 | trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env, | 164 | tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff; |
166 | addr, trace_mem_get_info(memop, 1)); | 165 | + if (have_altivec) { |
167 | + | 166 | + tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull; |
168 | + if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) { | 167 | + tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull; |
169 | + swap = tcg_temp_new_i32(); | ||
170 | + switch (memop & MO_SIZE) { | ||
171 | + case MO_16: | ||
172 | + tcg_gen_ext16u_i32(swap, val); | ||
173 | + tcg_gen_bswap16_i32(swap, swap); | ||
174 | + break; | ||
175 | + case MO_32: | ||
176 | + tcg_gen_bswap32_i32(swap, val); | ||
177 | + break; | ||
178 | + default: | ||
179 | + g_assert_not_reached(); | ||
180 | + } | ||
181 | + val = swap; | ||
182 | + memop &= ~MO_BSWAP; | ||
183 | + } | 168 | + } |
184 | + | 169 | |
185 | gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx); | 170 | tcg_target_call_clobber_regs = 0; |
186 | + | 171 | tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0); |
187 | + if (swap) { | ||
188 | + tcg_temp_free_i32(swap); | ||
189 | + } | ||
190 | } | ||
191 | |||
192 | void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop) | ||
193 | { | ||
194 | - tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); | ||
195 | + TCGMemOp orig_memop; | ||
196 | + | ||
197 | if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) { | ||
198 | tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop); | ||
199 | if (memop & MO_SIGN) { | ||
200 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop) | ||
201 | return; | ||
202 | } | ||
203 | |||
204 | + tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); | ||
205 | memop = tcg_canonicalize_memop(memop, 1, 0); | ||
206 | trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env, | ||
207 | addr, trace_mem_get_info(memop, 0)); | ||
208 | + | ||
209 | + orig_memop = memop; | ||
210 | + if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) { | ||
211 | + memop &= ~MO_BSWAP; | ||
212 | + /* The bswap primitive requires zero-extended input. */ | ||
213 | + if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) { | ||
214 | + memop &= ~MO_SIGN; | ||
215 | + } | ||
216 | + } | ||
217 | + | ||
218 | gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx); | ||
219 | + | ||
220 | + if ((orig_memop ^ memop) & MO_BSWAP) { | ||
221 | + switch (orig_memop & MO_SIZE) { | ||
222 | + case MO_16: | ||
223 | + tcg_gen_bswap16_i64(val, val); | ||
224 | + if (orig_memop & MO_SIGN) { | ||
225 | + tcg_gen_ext16s_i64(val, val); | ||
226 | + } | ||
227 | + break; | ||
228 | + case MO_32: | ||
229 | + tcg_gen_bswap32_i64(val, val); | ||
230 | + if (orig_memop & MO_SIGN) { | ||
231 | + tcg_gen_ext32s_i64(val, val); | ||
232 | + } | ||
233 | + break; | ||
234 | + case MO_64: | ||
235 | + tcg_gen_bswap64_i64(val, val); | ||
236 | + break; | ||
237 | + default: | ||
238 | + g_assert_not_reached(); | ||
239 | + } | ||
240 | + } | ||
241 | } | ||
242 | |||
243 | void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop) | ||
244 | { | ||
245 | - tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); | ||
246 | + TCGv_i64 swap = NULL; | ||
247 | + | ||
248 | if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) { | ||
249 | tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop); | ||
250 | return; | ||
251 | } | ||
252 | |||
253 | + tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); | ||
254 | memop = tcg_canonicalize_memop(memop, 1, 1); | ||
255 | trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env, | ||
256 | addr, trace_mem_get_info(memop, 1)); | ||
257 | + | ||
258 | + if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) { | ||
259 | + swap = tcg_temp_new_i64(); | ||
260 | + switch (memop & MO_SIZE) { | ||
261 | + case MO_16: | ||
262 | + tcg_gen_ext16u_i64(swap, val); | ||
263 | + tcg_gen_bswap16_i64(swap, swap); | ||
264 | + break; | ||
265 | + case MO_32: | ||
266 | + tcg_gen_ext32u_i64(swap, val); | ||
267 | + tcg_gen_bswap32_i64(swap, swap); | ||
268 | + break; | ||
269 | + case MO_64: | ||
270 | + tcg_gen_bswap64_i64(swap, val); | ||
271 | + break; | ||
272 | + default: | ||
273 | + g_assert_not_reached(); | ||
274 | + } | ||
275 | + val = swap; | ||
276 | + memop &= ~MO_BSWAP; | ||
277 | + } | ||
278 | + | ||
279 | gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx); | ||
280 | + | ||
281 | + if (swap) { | ||
282 | + tcg_temp_free_i64(swap); | ||
283 | + } | ||
284 | } | ||
285 | |||
286 | static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, TCGMemOp opc) | ||
287 | -- | 172 | -- |
288 | 2.17.2 | 173 | 2.17.1 |
289 | 174 | ||
290 | 175 | diff view generated by jsdifflib |
1 | This will move the assert for success from within (subroutines of) | 1 | Add various bits and peaces related mostly to load and store |
---|---|---|---|
2 | patch_reloc into the callers. It will also let new code do something | 2 | operations. In that context, logic, compare, and splat Altivec |
3 | different when a relocation is out of range. | 3 | instructions are used, and, therefore, the support for emitting |
4 | them is included in this patch too. | ||
4 | 5 | ||
5 | For the moment, all backends are trivially converted to return true. | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
7 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
8 | --- | ||
9 | tcg/ppc/tcg-target.h | 6 +- | ||
10 | tcg/ppc/tcg-target.inc.c | 472 ++++++++++++++++++++++++++++++++++++--- | ||
11 | 2 files changed, 442 insertions(+), 36 deletions(-) | ||
6 | 12 | ||
7 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | 13 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h |
8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | --- | ||
10 | tcg/aarch64/tcg-target.inc.c | 3 ++- | ||
11 | tcg/arm/tcg-target.inc.c | 3 ++- | ||
12 | tcg/i386/tcg-target.inc.c | 3 ++- | ||
13 | tcg/mips/tcg-target.inc.c | 3 ++- | ||
14 | tcg/ppc/tcg-target.inc.c | 3 ++- | ||
15 | tcg/s390/tcg-target.inc.c | 3 ++- | ||
16 | tcg/sparc/tcg-target.inc.c | 5 +++-- | ||
17 | tcg/tcg.c | 8 +++++--- | ||
18 | tcg/tci/tcg-target.inc.c | 3 ++- | ||
19 | 9 files changed, 22 insertions(+), 12 deletions(-) | ||
20 | |||
21 | diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c | ||
22 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
23 | --- a/tcg/aarch64/tcg-target.inc.c | 15 | --- a/tcg/ppc/tcg-target.h |
24 | +++ b/tcg/aarch64/tcg-target.inc.c | 16 | +++ b/tcg/ppc/tcg-target.h |
25 | @@ -XXX,XX +XXX,XX @@ static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target) | 17 | @@ -XXX,XX +XXX,XX @@ extern bool have_altivec; |
26 | *code_ptr = deposit32(*code_ptr, 5, 19, offset); | 18 | #define TCG_TARGET_HAS_v128 have_altivec |
27 | } | 19 | #define TCG_TARGET_HAS_v256 0 |
28 | 20 | ||
29 | -static inline void patch_reloc(tcg_insn_unit *code_ptr, int type, | 21 | -#define TCG_TARGET_HAS_andc_vec 0 |
30 | +static inline bool patch_reloc(tcg_insn_unit *code_ptr, int type, | 22 | +#define TCG_TARGET_HAS_andc_vec 1 |
31 | intptr_t value, intptr_t addend) | 23 | #define TCG_TARGET_HAS_orc_vec 0 |
32 | { | 24 | -#define TCG_TARGET_HAS_not_vec 0 |
33 | tcg_debug_assert(addend == 0); | 25 | +#define TCG_TARGET_HAS_not_vec 1 |
34 | @@ -XXX,XX +XXX,XX @@ static inline void patch_reloc(tcg_insn_unit *code_ptr, int type, | 26 | #define TCG_TARGET_HAS_neg_vec 0 |
35 | default: | 27 | #define TCG_TARGET_HAS_abs_vec 0 |
36 | tcg_abort(); | 28 | #define TCG_TARGET_HAS_shi_vec 0 |
37 | } | 29 | #define TCG_TARGET_HAS_shs_vec 0 |
38 | + return true; | 30 | #define TCG_TARGET_HAS_shv_vec 0 |
39 | } | 31 | -#define TCG_TARGET_HAS_cmp_vec 0 |
40 | 32 | +#define TCG_TARGET_HAS_cmp_vec 1 | |
41 | #define TCG_CT_CONST_AIMM 0x100 | 33 | #define TCG_TARGET_HAS_mul_vec 0 |
42 | diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c | 34 | #define TCG_TARGET_HAS_sat_vec 0 |
43 | index XXXXXXX..XXXXXXX 100644 | 35 | #define TCG_TARGET_HAS_minmax_vec 0 |
44 | --- a/tcg/arm/tcg-target.inc.c | ||
45 | +++ b/tcg/arm/tcg-target.inc.c | ||
46 | @@ -XXX,XX +XXX,XX @@ static inline void reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target) | ||
47 | *code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff); | ||
48 | } | ||
49 | |||
50 | -static void patch_reloc(tcg_insn_unit *code_ptr, int type, | ||
51 | +static bool patch_reloc(tcg_insn_unit *code_ptr, int type, | ||
52 | intptr_t value, intptr_t addend) | ||
53 | { | ||
54 | tcg_debug_assert(addend == 0); | ||
55 | @@ -XXX,XX +XXX,XX @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type, | ||
56 | } else { | ||
57 | g_assert_not_reached(); | ||
58 | } | ||
59 | + return true; | ||
60 | } | ||
61 | |||
62 | #define TCG_CT_CONST_ARM 0x100 | ||
63 | diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c | ||
64 | index XXXXXXX..XXXXXXX 100644 | ||
65 | --- a/tcg/i386/tcg-target.inc.c | ||
66 | +++ b/tcg/i386/tcg-target.inc.c | ||
67 | @@ -XXX,XX +XXX,XX @@ static bool have_lzcnt; | ||
68 | |||
69 | static tcg_insn_unit *tb_ret_addr; | ||
70 | |||
71 | -static void patch_reloc(tcg_insn_unit *code_ptr, int type, | ||
72 | +static bool patch_reloc(tcg_insn_unit *code_ptr, int type, | ||
73 | intptr_t value, intptr_t addend) | ||
74 | { | ||
75 | value += addend; | ||
76 | @@ -XXX,XX +XXX,XX @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type, | ||
77 | default: | ||
78 | tcg_abort(); | ||
79 | } | ||
80 | + return true; | ||
81 | } | ||
82 | |||
83 | #if TCG_TARGET_REG_BITS == 64 | ||
84 | diff --git a/tcg/mips/tcg-target.inc.c b/tcg/mips/tcg-target.inc.c | ||
85 | index XXXXXXX..XXXXXXX 100644 | ||
86 | --- a/tcg/mips/tcg-target.inc.c | ||
87 | +++ b/tcg/mips/tcg-target.inc.c | ||
88 | @@ -XXX,XX +XXX,XX @@ static inline void reloc_26(tcg_insn_unit *pc, tcg_insn_unit *target) | ||
89 | *pc = deposit32(*pc, 0, 26, reloc_26_val(pc, target)); | ||
90 | } | ||
91 | |||
92 | -static void patch_reloc(tcg_insn_unit *code_ptr, int type, | ||
93 | +static bool patch_reloc(tcg_insn_unit *code_ptr, int type, | ||
94 | intptr_t value, intptr_t addend) | ||
95 | { | ||
96 | tcg_debug_assert(type == R_MIPS_PC16); | ||
97 | tcg_debug_assert(addend == 0); | ||
98 | reloc_pc16(code_ptr, (tcg_insn_unit *)value); | ||
99 | + return true; | ||
100 | } | ||
101 | |||
102 | #define TCG_CT_CONST_ZERO 0x100 | ||
103 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 36 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c |
104 | index XXXXXXX..XXXXXXX 100644 | 37 | index XXXXXXX..XXXXXXX 100644 |
105 | --- a/tcg/ppc/tcg-target.inc.c | 38 | --- a/tcg/ppc/tcg-target.inc.c |
106 | +++ b/tcg/ppc/tcg-target.inc.c | 39 | +++ b/tcg/ppc/tcg-target.inc.c |
107 | @@ -XXX,XX +XXX,XX @@ static const uint32_t tcg_to_isel[] = { | 40 | @@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct, |
108 | [TCG_COND_GTU] = ISEL | BC_(7, CR_GT), | 41 | ct->ct |= TCG_CT_REG; |
109 | }; | 42 | ct->u.regs = 0xffffffff; |
110 | 43 | break; | |
111 | -static void patch_reloc(tcg_insn_unit *code_ptr, int type, | 44 | + case 'v': |
112 | +static bool patch_reloc(tcg_insn_unit *code_ptr, int type, | 45 | + ct->ct |= TCG_CT_REG; |
46 | + ct->u.regs = 0xffffffff00000000ull; | ||
47 | + break; | ||
48 | case 'L': /* qemu_ld constraint */ | ||
49 | ct->ct |= TCG_CT_REG; | ||
50 | ct->u.regs = 0xffffffff; | ||
51 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
52 | |||
53 | #define NOP ORI /* ori 0,0,0 */ | ||
54 | |||
55 | +#define LVX XO31(103) | ||
56 | +#define LVEBX XO31(7) | ||
57 | +#define LVEHX XO31(39) | ||
58 | +#define LVEWX XO31(71) | ||
59 | + | ||
60 | +#define STVX XO31(231) | ||
61 | +#define STVEWX XO31(199) | ||
62 | + | ||
63 | +#define VCMPEQUB VX4(6) | ||
64 | +#define VCMPEQUH VX4(70) | ||
65 | +#define VCMPEQUW VX4(134) | ||
66 | +#define VCMPGTSB VX4(774) | ||
67 | +#define VCMPGTSH VX4(838) | ||
68 | +#define VCMPGTSW VX4(902) | ||
69 | +#define VCMPGTUB VX4(518) | ||
70 | +#define VCMPGTUH VX4(582) | ||
71 | +#define VCMPGTUW VX4(646) | ||
72 | + | ||
73 | +#define VAND VX4(1028) | ||
74 | +#define VANDC VX4(1092) | ||
75 | +#define VNOR VX4(1284) | ||
76 | +#define VOR VX4(1156) | ||
77 | +#define VXOR VX4(1220) | ||
78 | + | ||
79 | +#define VSPLTB VX4(524) | ||
80 | +#define VSPLTH VX4(588) | ||
81 | +#define VSPLTW VX4(652) | ||
82 | +#define VSPLTISB VX4(780) | ||
83 | +#define VSPLTISH VX4(844) | ||
84 | +#define VSPLTISW VX4(908) | ||
85 | + | ||
86 | +#define VSLDOI VX4(44) | ||
87 | + | ||
88 | #define RT(r) ((r)<<21) | ||
89 | #define RS(r) ((r)<<21) | ||
90 | #define RA(r) ((r)<<16) | ||
91 | @@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, | ||
113 | intptr_t value, intptr_t addend) | 92 | intptr_t value, intptr_t addend) |
114 | { | 93 | { |
115 | tcg_insn_unit *target; | 94 | tcg_insn_unit *target; |
116 | @@ -XXX,XX +XXX,XX @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type, | 95 | + int16_t lo; |
96 | + int32_t hi; | ||
97 | |||
98 | value += addend; | ||
99 | target = (tcg_insn_unit *)value; | ||
100 | @@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, | ||
101 | } | ||
102 | *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc); | ||
103 | break; | ||
104 | + case R_PPC_ADDR32: | ||
105 | + /* | ||
106 | + * We are abusing this relocation type. Again, this points to | ||
107 | + * a pair of insns, lis + load. This is an absolute address | ||
108 | + * relocation for PPC32 so the lis cannot be removed. | ||
109 | + */ | ||
110 | + lo = value; | ||
111 | + hi = value - lo; | ||
112 | + if (hi + lo != value) { | ||
113 | + return false; | ||
114 | + } | ||
115 | + code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16); | ||
116 | + code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo); | ||
117 | + break; | ||
117 | default: | 118 | default: |
118 | g_assert_not_reached(); | 119 | g_assert_not_reached(); |
119 | } | 120 | } |
121 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, | ||
122 | |||
123 | static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) | ||
124 | { | ||
125 | - tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); | ||
126 | - if (ret != arg) { | ||
127 | - tcg_out32(s, OR | SAB(arg, ret, arg)); | ||
128 | + if (ret == arg) { | ||
129 | + return true; | ||
130 | + } | ||
131 | + switch (type) { | ||
132 | + case TCG_TYPE_I64: | ||
133 | + tcg_debug_assert(TCG_TARGET_REG_BITS == 64); | ||
134 | + /* fallthru */ | ||
135 | + case TCG_TYPE_I32: | ||
136 | + if (ret < TCG_REG_V0 && arg < TCG_REG_V0) { | ||
137 | + tcg_out32(s, OR | SAB(arg, ret, arg)); | ||
138 | + break; | ||
139 | + } else if (ret < TCG_REG_V0 || arg < TCG_REG_V0) { | ||
140 | + /* Altivec does not support vector/integer moves. */ | ||
141 | + return false; | ||
142 | + } | ||
143 | + /* fallthru */ | ||
144 | + case TCG_TYPE_V64: | ||
145 | + case TCG_TYPE_V128: | ||
146 | + tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0); | ||
147 | + tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg)); | ||
148 | + break; | ||
149 | + default: | ||
150 | + g_assert_not_reached(); | ||
151 | } | ||
152 | return true; | ||
153 | } | ||
154 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, | ||
155 | static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret, | ||
156 | tcg_target_long val) | ||
157 | { | ||
158 | - g_assert_not_reached(); | ||
159 | + uint32_t load_insn; | ||
160 | + int rel, low; | ||
161 | + intptr_t add; | ||
162 | + | ||
163 | + low = (int8_t)val; | ||
164 | + if (low >= -16 && low < 16) { | ||
165 | + if (val == (tcg_target_long)dup_const(MO_8, low)) { | ||
166 | + tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16)); | ||
167 | + return; | ||
168 | + } | ||
169 | + if (val == (tcg_target_long)dup_const(MO_16, low)) { | ||
170 | + tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16)); | ||
171 | + return; | ||
172 | + } | ||
173 | + if (val == (tcg_target_long)dup_const(MO_32, low)) { | ||
174 | + tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16)); | ||
175 | + return; | ||
176 | + } | ||
177 | + } | ||
178 | + | ||
179 | + /* | ||
180 | + * Otherwise we must load the value from the constant pool. | ||
181 | + */ | ||
182 | + if (USE_REG_TB) { | ||
183 | + rel = R_PPC_ADDR16; | ||
184 | + add = -(intptr_t)s->code_gen_ptr; | ||
185 | + } else { | ||
186 | + rel = R_PPC_ADDR32; | ||
187 | + add = 0; | ||
188 | + } | ||
189 | + | ||
190 | + load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1); | ||
191 | + if (TCG_TARGET_REG_BITS == 64) { | ||
192 | + new_pool_l2(s, rel, s->code_ptr, add, val, val); | ||
193 | + } else { | ||
194 | + new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val); | ||
195 | + } | ||
196 | + | ||
197 | + if (USE_REG_TB) { | ||
198 | + tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0)); | ||
199 | + load_insn |= RA(TCG_REG_TB); | ||
200 | + } else { | ||
201 | + tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0)); | ||
202 | + tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0)); | ||
203 | + } | ||
204 | + tcg_out32(s, load_insn); | ||
205 | } | ||
206 | |||
207 | static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, | ||
208 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, | ||
209 | align = 3; | ||
210 | /* FALLTHRU */ | ||
211 | default: | ||
212 | - if (rt != TCG_REG_R0) { | ||
213 | + if (rt > TCG_REG_R0 && rt < TCG_REG_V0) { | ||
214 | rs = rt; | ||
215 | break; | ||
216 | } | ||
217 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, | ||
218 | } | ||
219 | |||
220 | /* For unaligned, or very large offsets, use the indexed form. */ | ||
221 | - if (offset & align || offset != (int32_t)offset) { | ||
222 | + if (offset & align || offset != (int32_t)offset || opi == 0) { | ||
223 | if (rs == base) { | ||
224 | rs = TCG_REG_R0; | ||
225 | } | ||
226 | tcg_debug_assert(!is_store || rs != rt); | ||
227 | tcg_out_movi(s, TCG_TYPE_PTR, rs, orig); | ||
228 | - tcg_out32(s, opx | TAB(rt, base, rs)); | ||
229 | + tcg_out32(s, opx | TAB(rt & 31, base, rs)); | ||
230 | return; | ||
231 | } | ||
232 | |||
233 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, | ||
234 | base = rs; | ||
235 | } | ||
236 | if (opi != ADDI || base != rt || l0 != 0) { | ||
237 | - tcg_out32(s, opi | TAI(rt, base, l0)); | ||
238 | + tcg_out32(s, opi | TAI(rt & 31, base, l0)); | ||
239 | } | ||
240 | } | ||
241 | |||
242 | -static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, | ||
243 | - TCGReg arg1, intptr_t arg2) | ||
244 | +static void tcg_out_vsldoi(TCGContext *s, TCGReg ret, | ||
245 | + TCGReg va, TCGReg vb, int shb) | ||
246 | { | ||
247 | - int opi, opx; | ||
248 | - | ||
249 | - tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); | ||
250 | - if (type == TCG_TYPE_I32) { | ||
251 | - opi = LWZ, opx = LWZX; | ||
252 | - } else { | ||
253 | - opi = LD, opx = LDX; | ||
254 | - } | ||
255 | - tcg_out_mem_long(s, opi, opx, ret, arg1, arg2); | ||
256 | + tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6)); | ||
257 | } | ||
258 | |||
259 | -static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, | ||
260 | - TCGReg arg1, intptr_t arg2) | ||
261 | +static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, | ||
262 | + TCGReg base, intptr_t offset) | ||
263 | { | ||
264 | - int opi, opx; | ||
265 | + int shift; | ||
266 | |||
267 | - tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); | ||
268 | - if (type == TCG_TYPE_I32) { | ||
269 | - opi = STW, opx = STWX; | ||
270 | - } else { | ||
271 | - opi = STD, opx = STDX; | ||
272 | + switch (type) { | ||
273 | + case TCG_TYPE_I32: | ||
274 | + if (ret < TCG_REG_V0) { | ||
275 | + tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset); | ||
276 | + break; | ||
277 | + } | ||
278 | + tcg_debug_assert((offset & 3) == 0); | ||
279 | + tcg_out_mem_long(s, 0, LVEWX, ret, base, offset); | ||
280 | + shift = (offset - 4) & 0xc; | ||
281 | + if (shift) { | ||
282 | + tcg_out_vsldoi(s, ret, ret, ret, shift); | ||
283 | + } | ||
284 | + break; | ||
285 | + case TCG_TYPE_I64: | ||
286 | + if (ret < TCG_REG_V0) { | ||
287 | + tcg_debug_assert(TCG_TARGET_REG_BITS == 64); | ||
288 | + tcg_out_mem_long(s, LD, LDX, ret, base, offset); | ||
289 | + break; | ||
290 | + } | ||
291 | + /* fallthru */ | ||
292 | + case TCG_TYPE_V64: | ||
293 | + tcg_debug_assert(ret >= TCG_REG_V0); | ||
294 | + tcg_debug_assert((offset & 7) == 0); | ||
295 | + tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16); | ||
296 | + if (offset & 8) { | ||
297 | + tcg_out_vsldoi(s, ret, ret, ret, 8); | ||
298 | + } | ||
299 | + break; | ||
300 | + case TCG_TYPE_V128: | ||
301 | + tcg_debug_assert(ret >= TCG_REG_V0); | ||
302 | + tcg_debug_assert((offset & 15) == 0); | ||
303 | + tcg_out_mem_long(s, 0, LVX, ret, base, offset); | ||
304 | + break; | ||
305 | + default: | ||
306 | + g_assert_not_reached(); | ||
307 | + } | ||
308 | +} | ||
309 | + | ||
310 | +static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, | ||
311 | + TCGReg base, intptr_t offset) | ||
312 | +{ | ||
313 | + int shift; | ||
314 | + | ||
315 | + switch (type) { | ||
316 | + case TCG_TYPE_I32: | ||
317 | + if (arg < TCG_REG_V0) { | ||
318 | + tcg_out_mem_long(s, STW, STWX, arg, base, offset); | ||
319 | + break; | ||
320 | + } | ||
321 | + tcg_debug_assert((offset & 3) == 0); | ||
322 | + shift = (offset - 4) & 0xc; | ||
323 | + if (shift) { | ||
324 | + tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift); | ||
325 | + arg = TCG_VEC_TMP1; | ||
326 | + } | ||
327 | + tcg_out_mem_long(s, 0, STVEWX, arg, base, offset); | ||
328 | + break; | ||
329 | + case TCG_TYPE_I64: | ||
330 | + if (arg < TCG_REG_V0) { | ||
331 | + tcg_debug_assert(TCG_TARGET_REG_BITS == 64); | ||
332 | + tcg_out_mem_long(s, STD, STDX, arg, base, offset); | ||
333 | + break; | ||
334 | + } | ||
335 | + /* fallthru */ | ||
336 | + case TCG_TYPE_V64: | ||
337 | + tcg_debug_assert(arg >= TCG_REG_V0); | ||
338 | + tcg_debug_assert((offset & 7) == 0); | ||
339 | + if (offset & 8) { | ||
340 | + tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8); | ||
341 | + arg = TCG_VEC_TMP1; | ||
342 | + } | ||
343 | + tcg_out_mem_long(s, 0, STVEWX, arg, base, offset); | ||
344 | + tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4); | ||
345 | + break; | ||
346 | + case TCG_TYPE_V128: | ||
347 | + tcg_debug_assert(arg >= TCG_REG_V0); | ||
348 | + tcg_out_mem_long(s, 0, STVX, arg, base, offset); | ||
349 | + break; | ||
350 | + default: | ||
351 | + g_assert_not_reached(); | ||
352 | } | ||
353 | - tcg_out_mem_long(s, opi, opx, arg, arg1, arg2); | ||
354 | } | ||
355 | |||
356 | static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, | ||
357 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, | ||
358 | |||
359 | int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | ||
360 | { | ||
361 | - g_assert_not_reached(); | ||
362 | + switch (opc) { | ||
363 | + case INDEX_op_and_vec: | ||
364 | + case INDEX_op_or_vec: | ||
365 | + case INDEX_op_xor_vec: | ||
366 | + case INDEX_op_andc_vec: | ||
367 | + case INDEX_op_not_vec: | ||
368 | + return 1; | ||
369 | + case INDEX_op_cmp_vec: | ||
370 | + return vece <= MO_32 ? -1 : 0; | ||
371 | + default: | ||
372 | + return 0; | ||
373 | + } | ||
374 | } | ||
375 | |||
376 | static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, | ||
377 | TCGReg dst, TCGReg src) | ||
378 | { | ||
379 | - g_assert_not_reached(); | ||
380 | + tcg_debug_assert(dst >= TCG_REG_V0); | ||
381 | + tcg_debug_assert(src >= TCG_REG_V0); | ||
382 | + | ||
383 | + /* | ||
384 | + * Recall we use (or emulate) VSX integer loads, so the integer is | ||
385 | + * right justified within the left (zero-index) double-word. | ||
386 | + */ | ||
387 | + switch (vece) { | ||
388 | + case MO_8: | ||
389 | + tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16)); | ||
390 | + break; | ||
391 | + case MO_16: | ||
392 | + tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16)); | ||
393 | + break; | ||
394 | + case MO_32: | ||
395 | + tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16)); | ||
396 | + break; | ||
397 | + case MO_64: | ||
398 | + tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8); | ||
399 | + tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8); | ||
400 | + break; | ||
401 | + default: | ||
402 | + g_assert_not_reached(); | ||
403 | + } | ||
120 | + return true; | 404 | + return true; |
121 | } | 405 | } |
122 | 406 | ||
123 | static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, | 407 | static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, |
124 | diff --git a/tcg/s390/tcg-target.inc.c b/tcg/s390/tcg-target.inc.c | 408 | TCGReg out, TCGReg base, intptr_t offset) |
125 | index XXXXXXX..XXXXXXX 100644 | 409 | { |
126 | --- a/tcg/s390/tcg-target.inc.c | 410 | - g_assert_not_reached(); |
127 | +++ b/tcg/s390/tcg-target.inc.c | 411 | + int elt; |
128 | @@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[16] = { | 412 | + |
129 | static tcg_insn_unit *tb_ret_addr; | 413 | + tcg_debug_assert(out >= TCG_REG_V0); |
130 | uint64_t s390_facilities; | 414 | + switch (vece) { |
131 | 415 | + case MO_8: | |
132 | -static void patch_reloc(tcg_insn_unit *code_ptr, int type, | 416 | + tcg_out_mem_long(s, 0, LVEBX, out, base, offset); |
133 | +static bool patch_reloc(tcg_insn_unit *code_ptr, int type, | 417 | + elt = extract32(offset, 0, 4); |
134 | intptr_t value, intptr_t addend) | 418 | +#ifndef HOST_WORDS_BIGENDIAN |
135 | { | 419 | + elt ^= 15; |
136 | intptr_t pcrel2; | 420 | +#endif |
137 | @@ -XXX,XX +XXX,XX @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type, | 421 | + tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16)); |
422 | + break; | ||
423 | + case MO_16: | ||
424 | + tcg_debug_assert((offset & 1) == 0); | ||
425 | + tcg_out_mem_long(s, 0, LVEHX, out, base, offset); | ||
426 | + elt = extract32(offset, 1, 3); | ||
427 | +#ifndef HOST_WORDS_BIGENDIAN | ||
428 | + elt ^= 7; | ||
429 | +#endif | ||
430 | + tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16)); | ||
431 | + break; | ||
432 | + case MO_32: | ||
433 | + tcg_debug_assert((offset & 3) == 0); | ||
434 | + tcg_out_mem_long(s, 0, LVEWX, out, base, offset); | ||
435 | + elt = extract32(offset, 2, 2); | ||
436 | +#ifndef HOST_WORDS_BIGENDIAN | ||
437 | + elt ^= 3; | ||
438 | +#endif | ||
439 | + tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16)); | ||
440 | + break; | ||
441 | + case MO_64: | ||
442 | + tcg_debug_assert((offset & 7) == 0); | ||
443 | + tcg_out_mem_long(s, 0, LVX, out, base, offset & -16); | ||
444 | + tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8); | ||
445 | + elt = extract32(offset, 3, 1); | ||
446 | +#ifndef HOST_WORDS_BIGENDIAN | ||
447 | + elt = !elt; | ||
448 | +#endif | ||
449 | + if (elt) { | ||
450 | + tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8); | ||
451 | + } else { | ||
452 | + tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8); | ||
453 | + } | ||
454 | + break; | ||
455 | + default: | ||
456 | + g_assert_not_reached(); | ||
457 | + } | ||
458 | + return true; | ||
459 | } | ||
460 | |||
461 | static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
462 | unsigned vecl, unsigned vece, | ||
463 | const TCGArg *args, const int *const_args) | ||
464 | { | ||
465 | - g_assert_not_reached(); | ||
466 | + static const uint32_t | ||
467 | + eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 }, | ||
468 | + gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 }, | ||
469 | + gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 }; | ||
470 | + | ||
471 | + TCGType type = vecl + TCG_TYPE_V64; | ||
472 | + TCGArg a0 = args[0], a1 = args[1], a2 = args[2]; | ||
473 | + uint32_t insn; | ||
474 | + | ||
475 | + switch (opc) { | ||
476 | + case INDEX_op_ld_vec: | ||
477 | + tcg_out_ld(s, type, a0, a1, a2); | ||
478 | + return; | ||
479 | + case INDEX_op_st_vec: | ||
480 | + tcg_out_st(s, type, a0, a1, a2); | ||
481 | + return; | ||
482 | + case INDEX_op_dupm_vec: | ||
483 | + tcg_out_dupm_vec(s, type, vece, a0, a1, a2); | ||
484 | + return; | ||
485 | + | ||
486 | + case INDEX_op_and_vec: | ||
487 | + insn = VAND; | ||
488 | + break; | ||
489 | + case INDEX_op_or_vec: | ||
490 | + insn = VOR; | ||
491 | + break; | ||
492 | + case INDEX_op_xor_vec: | ||
493 | + insn = VXOR; | ||
494 | + break; | ||
495 | + case INDEX_op_andc_vec: | ||
496 | + insn = VANDC; | ||
497 | + break; | ||
498 | + case INDEX_op_not_vec: | ||
499 | + insn = VNOR; | ||
500 | + a2 = a1; | ||
501 | + break; | ||
502 | + | ||
503 | + case INDEX_op_cmp_vec: | ||
504 | + switch (args[3]) { | ||
505 | + case TCG_COND_EQ: | ||
506 | + insn = eq_op[vece]; | ||
507 | + break; | ||
508 | + case TCG_COND_GT: | ||
509 | + insn = gts_op[vece]; | ||
510 | + break; | ||
511 | + case TCG_COND_GTU: | ||
512 | + insn = gtu_op[vece]; | ||
513 | + break; | ||
514 | + default: | ||
515 | + g_assert_not_reached(); | ||
516 | + } | ||
517 | + break; | ||
518 | + | ||
519 | + case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ | ||
520 | + case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */ | ||
521 | + case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ | ||
522 | + default: | ||
523 | + g_assert_not_reached(); | ||
524 | + } | ||
525 | + | ||
526 | + tcg_debug_assert(insn != 0); | ||
527 | + tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2)); | ||
528 | +} | ||
529 | + | ||
530 | +static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, | ||
531 | + TCGv_vec v1, TCGv_vec v2, TCGCond cond) | ||
532 | +{ | ||
533 | + bool need_swap = false, need_inv = false; | ||
534 | + | ||
535 | + tcg_debug_assert(vece <= MO_32); | ||
536 | + | ||
537 | + switch (cond) { | ||
538 | + case TCG_COND_EQ: | ||
539 | + case TCG_COND_GT: | ||
540 | + case TCG_COND_GTU: | ||
541 | + break; | ||
542 | + case TCG_COND_NE: | ||
543 | + case TCG_COND_LE: | ||
544 | + case TCG_COND_LEU: | ||
545 | + need_inv = true; | ||
546 | + break; | ||
547 | + case TCG_COND_LT: | ||
548 | + case TCG_COND_LTU: | ||
549 | + need_swap = true; | ||
550 | + break; | ||
551 | + case TCG_COND_GE: | ||
552 | + case TCG_COND_GEU: | ||
553 | + need_swap = need_inv = true; | ||
554 | + break; | ||
555 | + default: | ||
556 | + g_assert_not_reached(); | ||
557 | + } | ||
558 | + | ||
559 | + if (need_inv) { | ||
560 | + cond = tcg_invert_cond(cond); | ||
561 | + } | ||
562 | + if (need_swap) { | ||
563 | + TCGv_vec t1; | ||
564 | + t1 = v1, v1 = v2, v2 = t1; | ||
565 | + cond = tcg_swap_cond(cond); | ||
566 | + } | ||
567 | + | ||
568 | + vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0), | ||
569 | + tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond); | ||
570 | + | ||
571 | + if (need_inv) { | ||
572 | + tcg_gen_not_vec(vece, v0, v0); | ||
573 | + } | ||
574 | } | ||
575 | |||
576 | void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, | ||
577 | TCGArg a0, ...) | ||
578 | { | ||
579 | - g_assert_not_reached(); | ||
580 | + va_list va; | ||
581 | + TCGv_vec v0, v1, v2; | ||
582 | + | ||
583 | + va_start(va, a0); | ||
584 | + v0 = temp_tcgv_vec(arg_temp(a0)); | ||
585 | + v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); | ||
586 | + v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); | ||
587 | + | ||
588 | + switch (opc) { | ||
589 | + case INDEX_op_cmp_vec: | ||
590 | + expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg)); | ||
591 | + break; | ||
592 | + default: | ||
593 | + g_assert_not_reached(); | ||
594 | + } | ||
595 | + va_end(va); | ||
596 | } | ||
597 | |||
598 | static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
599 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
600 | = { .args_ct_str = { "r", "r", "r", "r", "rI", "rZM" } }; | ||
601 | static const TCGTargetOpDef sub2 | ||
602 | = { .args_ct_str = { "r", "r", "rI", "rZM", "r", "r" } }; | ||
603 | + static const TCGTargetOpDef v_r = { .args_ct_str = { "v", "r" } }; | ||
604 | + static const TCGTargetOpDef v_v = { .args_ct_str = { "v", "v" } }; | ||
605 | + static const TCGTargetOpDef v_v_v = { .args_ct_str = { "v", "v", "v" } }; | ||
606 | |||
607 | switch (op) { | ||
608 | case INDEX_op_goto_ptr: | ||
609 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
610 | return (TCG_TARGET_REG_BITS == 64 ? &S_S | ||
611 | : TARGET_LONG_BITS == 32 ? &S_S_S : &S_S_S_S); | ||
612 | |||
613 | + case INDEX_op_and_vec: | ||
614 | + case INDEX_op_or_vec: | ||
615 | + case INDEX_op_xor_vec: | ||
616 | + case INDEX_op_andc_vec: | ||
617 | + case INDEX_op_orc_vec: | ||
618 | + case INDEX_op_cmp_vec: | ||
619 | + return &v_v_v; | ||
620 | + case INDEX_op_not_vec: | ||
621 | + case INDEX_op_dup_vec: | ||
622 | + return &v_v; | ||
623 | + case INDEX_op_ld_vec: | ||
624 | + case INDEX_op_st_vec: | ||
625 | + case INDEX_op_dupm_vec: | ||
626 | + return &v_r; | ||
627 | + | ||
138 | default: | 628 | default: |
139 | g_assert_not_reached(); | 629 | return NULL; |
140 | } | 630 | } |
141 | + return true; | ||
142 | } | ||
143 | |||
144 | /* parse target specific constraints */ | ||
145 | diff --git a/tcg/sparc/tcg-target.inc.c b/tcg/sparc/tcg-target.inc.c | ||
146 | index XXXXXXX..XXXXXXX 100644 | ||
147 | --- a/tcg/sparc/tcg-target.inc.c | ||
148 | +++ b/tcg/sparc/tcg-target.inc.c | ||
149 | @@ -XXX,XX +XXX,XX @@ static inline int check_fit_i32(int32_t val, unsigned int bits) | ||
150 | # define check_fit_ptr check_fit_i32 | ||
151 | #endif | ||
152 | |||
153 | -static void patch_reloc(tcg_insn_unit *code_ptr, int type, | ||
154 | +static bool patch_reloc(tcg_insn_unit *code_ptr, int type, | ||
155 | intptr_t value, intptr_t addend) | ||
156 | { | ||
157 | uint32_t insn = *code_ptr; | ||
158 | @@ -XXX,XX +XXX,XX @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type, | ||
159 | /* Note that we're abusing this reloc type for our own needs. */ | ||
160 | code_ptr[0] = deposit32(code_ptr[0], 0, 22, value >> 10); | ||
161 | code_ptr[1] = deposit32(code_ptr[1], 0, 10, value); | ||
162 | - return; | ||
163 | + return true; | ||
164 | default: | ||
165 | g_assert_not_reached(); | ||
166 | } | ||
167 | |||
168 | *code_ptr = insn; | ||
169 | + return true; | ||
170 | } | ||
171 | |||
172 | /* parse target specific constraints */ | ||
173 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
174 | index XXXXXXX..XXXXXXX 100644 | ||
175 | --- a/tcg/tcg.c | ||
176 | +++ b/tcg/tcg.c | ||
177 | @@ -XXX,XX +XXX,XX @@ | ||
178 | static void tcg_target_init(TCGContext *s); | ||
179 | static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode); | ||
180 | static void tcg_target_qemu_prologue(TCGContext *s); | ||
181 | -static void patch_reloc(tcg_insn_unit *code_ptr, int type, | ||
182 | +static bool patch_reloc(tcg_insn_unit *code_ptr, int type, | ||
183 | intptr_t value, intptr_t addend); | ||
184 | |||
185 | /* The CIE and FDE header definitions will be common to all hosts. */ | ||
186 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type, | ||
187 | /* FIXME: This may break relocations on RISC targets that | ||
188 | modify instruction fields in place. The caller may not have | ||
189 | written the initial value. */ | ||
190 | - patch_reloc(code_ptr, type, l->u.value, addend); | ||
191 | + bool ok = patch_reloc(code_ptr, type, l->u.value, addend); | ||
192 | + tcg_debug_assert(ok); | ||
193 | } else { | ||
194 | /* add a new relocation entry */ | ||
195 | r = tcg_malloc(sizeof(TCGRelocation)); | ||
196 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr) | ||
197 | tcg_debug_assert(!l->has_value); | ||
198 | |||
199 | for (r = l->u.first_reloc; r != NULL; r = r->next) { | ||
200 | - patch_reloc(r->ptr, r->type, value, r->addend); | ||
201 | + bool ok = patch_reloc(r->ptr, r->type, value, r->addend); | ||
202 | + tcg_debug_assert(ok); | ||
203 | } | ||
204 | |||
205 | l->has_value = 1; | ||
206 | diff --git a/tcg/tci/tcg-target.inc.c b/tcg/tci/tcg-target.inc.c | ||
207 | index XXXXXXX..XXXXXXX 100644 | ||
208 | --- a/tcg/tci/tcg-target.inc.c | ||
209 | +++ b/tcg/tci/tcg-target.inc.c | ||
210 | @@ -XXX,XX +XXX,XX @@ static const char *const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { | ||
211 | }; | ||
212 | #endif | ||
213 | |||
214 | -static void patch_reloc(tcg_insn_unit *code_ptr, int type, | ||
215 | +static bool patch_reloc(tcg_insn_unit *code_ptr, int type, | ||
216 | intptr_t value, intptr_t addend) | ||
217 | { | ||
218 | /* tcg_out_reloc always uses the same type, addend. */ | ||
219 | @@ -XXX,XX +XXX,XX @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type, | ||
220 | } else { | ||
221 | tcg_patch64(code_ptr, value); | ||
222 | } | ||
223 | + return true; | ||
224 | } | ||
225 | |||
226 | /* Parse target specific constraints. */ | ||
227 | -- | 631 | -- |
228 | 2.17.2 | 632 | 2.17.1 |
229 | 633 | ||
230 | 634 | diff view generated by jsdifflib |
1 | Somehow we forgot these operations, once upon a time. | 1 | Add support for vector maximum/minimum using Altivec instructions |
---|---|---|---|
2 | This will allow immediate stores to have their bswap | 2 | VMAXSB, VMAXSH, VMAXSW, VMAXUB, VMAXUH, VMAXUW, and |
3 | optimized away. | 3 | VMINSB, VMINSH, VMINSW, VMINUB, VMINUH, VMINUW. |
4 | 4 | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
6 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
6 | --- | 7 | --- |
7 | tcg/optimize.c | 12 ++++++++++++ | 8 | tcg/ppc/tcg-target.h | 2 +- |
8 | 1 file changed, 12 insertions(+) | 9 | tcg/ppc/tcg-target.inc.c | 40 +++++++++++++++++++++++++++++++++++++++- |
10 | 2 files changed, 40 insertions(+), 2 deletions(-) | ||
9 | 11 | ||
10 | diff --git a/tcg/optimize.c b/tcg/optimize.c | 12 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h |
11 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
12 | --- a/tcg/optimize.c | 14 | --- a/tcg/ppc/tcg-target.h |
13 | +++ b/tcg/optimize.c | 15 | +++ b/tcg/ppc/tcg-target.h |
14 | @@ -XXX,XX +XXX,XX @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) | 16 | @@ -XXX,XX +XXX,XX @@ extern bool have_altivec; |
15 | CASE_OP_32_64(ext16u): | 17 | #define TCG_TARGET_HAS_cmp_vec 1 |
16 | return (uint16_t)x; | 18 | #define TCG_TARGET_HAS_mul_vec 0 |
17 | 19 | #define TCG_TARGET_HAS_sat_vec 0 | |
18 | + CASE_OP_32_64(bswap16): | 20 | -#define TCG_TARGET_HAS_minmax_vec 0 |
19 | + return bswap16(x); | 21 | +#define TCG_TARGET_HAS_minmax_vec 1 |
22 | #define TCG_TARGET_HAS_bitsel_vec 0 | ||
23 | #define TCG_TARGET_HAS_cmpsel_vec 0 | ||
24 | |||
25 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
26 | index XXXXXXX..XXXXXXX 100644 | ||
27 | --- a/tcg/ppc/tcg-target.inc.c | ||
28 | +++ b/tcg/ppc/tcg-target.inc.c | ||
29 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
30 | #define STVX XO31(231) | ||
31 | #define STVEWX XO31(199) | ||
32 | |||
33 | +#define VMAXSB VX4(258) | ||
34 | +#define VMAXSH VX4(322) | ||
35 | +#define VMAXSW VX4(386) | ||
36 | +#define VMAXUB VX4(2) | ||
37 | +#define VMAXUH VX4(66) | ||
38 | +#define VMAXUW VX4(130) | ||
39 | +#define VMINSB VX4(770) | ||
40 | +#define VMINSH VX4(834) | ||
41 | +#define VMINSW VX4(898) | ||
42 | +#define VMINUB VX4(514) | ||
43 | +#define VMINUH VX4(578) | ||
44 | +#define VMINUW VX4(642) | ||
20 | + | 45 | + |
21 | + CASE_OP_32_64(bswap32): | 46 | #define VCMPEQUB VX4(6) |
22 | + return bswap32(x); | 47 | #define VCMPEQUH VX4(70) |
23 | + | 48 | #define VCMPEQUW VX4(134) |
24 | + case INDEX_op_bswap64_i64: | 49 | @@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) |
25 | + return bswap64(x); | 50 | case INDEX_op_andc_vec: |
26 | + | 51 | case INDEX_op_not_vec: |
27 | case INDEX_op_ext_i32_i64: | 52 | return 1; |
28 | case INDEX_op_ext32s_i64: | 53 | + case INDEX_op_smax_vec: |
29 | return (int32_t)x; | 54 | + case INDEX_op_smin_vec: |
30 | @@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s) | 55 | + case INDEX_op_umax_vec: |
31 | CASE_OP_32_64(ext16s): | 56 | + case INDEX_op_umin_vec: |
32 | CASE_OP_32_64(ext16u): | 57 | + return vece <= MO_32; |
33 | CASE_OP_32_64(ctpop): | 58 | case INDEX_op_cmp_vec: |
34 | + CASE_OP_32_64(bswap16): | 59 | return vece <= MO_32 ? -1 : 0; |
35 | + CASE_OP_32_64(bswap32): | 60 | default: |
36 | + case INDEX_op_bswap64_i64: | 61 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, |
37 | case INDEX_op_ext32s_i64: | 62 | static const uint32_t |
38 | case INDEX_op_ext32u_i64: | 63 | eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 }, |
39 | case INDEX_op_ext_i32_i64: | 64 | gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 }, |
65 | - gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 }; | ||
66 | + gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 }, | ||
67 | + umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 }, | ||
68 | + smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 }, | ||
69 | + umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 }, | ||
70 | + smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 }; | ||
71 | |||
72 | TCGType type = vecl + TCG_TYPE_V64; | ||
73 | TCGArg a0 = args[0], a1 = args[1], a2 = args[2]; | ||
74 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
75 | tcg_out_dupm_vec(s, type, vece, a0, a1, a2); | ||
76 | return; | ||
77 | |||
78 | + case INDEX_op_smin_vec: | ||
79 | + insn = smin_op[vece]; | ||
80 | + break; | ||
81 | + case INDEX_op_umin_vec: | ||
82 | + insn = umin_op[vece]; | ||
83 | + break; | ||
84 | + case INDEX_op_smax_vec: | ||
85 | + insn = smax_op[vece]; | ||
86 | + break; | ||
87 | + case INDEX_op_umax_vec: | ||
88 | + insn = umax_op[vece]; | ||
89 | + break; | ||
90 | case INDEX_op_and_vec: | ||
91 | insn = VAND; | ||
92 | break; | ||
93 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
94 | case INDEX_op_andc_vec: | ||
95 | case INDEX_op_orc_vec: | ||
96 | case INDEX_op_cmp_vec: | ||
97 | + case INDEX_op_smax_vec: | ||
98 | + case INDEX_op_smin_vec: | ||
99 | + case INDEX_op_umax_vec: | ||
100 | + case INDEX_op_umin_vec: | ||
101 | return &v_v_v; | ||
102 | case INDEX_op_not_vec: | ||
103 | case INDEX_op_dup_vec: | ||
40 | -- | 104 | -- |
41 | 2.17.2 | 105 | 2.17.1 |
42 | 106 | ||
43 | 107 | diff view generated by jsdifflib |
1 | Based on the only current user, Sparc: | 1 | Add support for vector add/subtract using Altivec instructions: |
---|---|---|---|
2 | 2 | VADDUBM, VADDUHM, VADDUWM, VSUBUBM, VSUBUHM, VSUBUWM. | |
3 | New code uses 1 constant that takes 2 insns to create, plus 8. | ||
4 | Old code used 2 constants that took 2 insns to create, plus 9. | ||
5 | The result is a new total of 10 vs an old total of 13. | ||
6 | 3 | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
5 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
8 | --- | 6 | --- |
9 | tcg/tcg-op.c | 54 ++++++++++++++++++++++++++-------------------------- | 7 | tcg/ppc/tcg-target.inc.c | 20 ++++++++++++++++++++ |
10 | 1 file changed, 27 insertions(+), 27 deletions(-) | 8 | 1 file changed, 20 insertions(+) |
11 | 9 | ||
12 | diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c | 10 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c |
13 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/tcg/tcg-op.c | 12 | --- a/tcg/ppc/tcg-target.inc.c |
15 | +++ b/tcg/tcg-op.c | 13 | +++ b/tcg/ppc/tcg-target.inc.c |
16 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg) | 14 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, |
17 | if (TCG_TARGET_HAS_bswap32_i32) { | 15 | #define STVX XO31(231) |
18 | tcg_gen_op2_i32(INDEX_op_bswap32_i32, ret, arg); | 16 | #define STVEWX XO31(199) |
19 | } else { | 17 | |
20 | - TCGv_i32 t0, t1; | 18 | +#define VADDUBM VX4(0) |
21 | - t0 = tcg_temp_new_i32(); | 19 | +#define VADDUHM VX4(64) |
22 | - t1 = tcg_temp_new_i32(); | 20 | +#define VADDUWM VX4(128) |
23 | + TCGv_i32 t0 = tcg_temp_new_i32(); | 21 | + |
24 | + TCGv_i32 t1 = tcg_temp_new_i32(); | 22 | +#define VSUBUBM VX4(1024) |
25 | + TCGv_i32 t2 = tcg_const_i32(0x00ff00ff); | 23 | +#define VSUBUHM VX4(1088) |
26 | 24 | +#define VSUBUWM VX4(1152) | |
27 | - tcg_gen_shli_i32(t0, arg, 24); | 25 | + |
28 | + /* arg = abcd */ | 26 | #define VMAXSB VX4(258) |
29 | + tcg_gen_shri_i32(t0, arg, 8); /* t0 = .abc */ | 27 | #define VMAXSH VX4(322) |
30 | + tcg_gen_and_i32(t1, arg, t2); /* t1 = .b.d */ | 28 | #define VMAXSW VX4(386) |
31 | + tcg_gen_and_i32(t0, t0, t2); /* t0 = .a.c */ | 29 | @@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) |
32 | + tcg_temp_free_i32(t2); | 30 | case INDEX_op_andc_vec: |
33 | + tcg_gen_shli_i32(t1, t1, 8); /* t1 = b.d. */ | 31 | case INDEX_op_not_vec: |
34 | + tcg_gen_or_i32(ret, t0, t1); /* ret = badc */ | 32 | return 1; |
35 | 33 | + case INDEX_op_add_vec: | |
36 | - tcg_gen_andi_i32(t1, arg, 0x0000ff00); | 34 | + case INDEX_op_sub_vec: |
37 | - tcg_gen_shli_i32(t1, t1, 8); | 35 | case INDEX_op_smax_vec: |
38 | - tcg_gen_or_i32(t0, t0, t1); | 36 | case INDEX_op_smin_vec: |
39 | + tcg_gen_shri_i32(t0, ret, 16); /* t0 = ..ba */ | 37 | case INDEX_op_umax_vec: |
40 | + tcg_gen_shli_i32(t1, ret, 16); /* t1 = dc.. */ | 38 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, |
41 | + tcg_gen_or_i32(ret, t0, t1); /* ret = dcba */ | 39 | const TCGArg *args, const int *const_args) |
42 | 40 | { | |
43 | - tcg_gen_shri_i32(t1, arg, 8); | 41 | static const uint32_t |
44 | - tcg_gen_andi_i32(t1, t1, 0x0000ff00); | 42 | + add_op[4] = { VADDUBM, VADDUHM, VADDUWM, 0 }, |
45 | - tcg_gen_or_i32(t0, t0, t1); | 43 | + sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, 0 }, |
46 | - | 44 | eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 }, |
47 | - tcg_gen_shri_i32(t1, arg, 24); | 45 | gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 }, |
48 | - tcg_gen_or_i32(ret, t0, t1); | 46 | gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 }, |
49 | tcg_temp_free_i32(t0); | 47 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, |
50 | tcg_temp_free_i32(t1); | 48 | tcg_out_dupm_vec(s, type, vece, a0, a1, a2); |
51 | } | 49 | return; |
52 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg) | 50 | |
53 | } else if (TCG_TARGET_HAS_bswap32_i64) { | 51 | + case INDEX_op_add_vec: |
54 | tcg_gen_op2_i64(INDEX_op_bswap32_i64, ret, arg); | 52 | + insn = add_op[vece]; |
55 | } else { | 53 | + break; |
56 | - TCGv_i64 t0, t1; | 54 | + case INDEX_op_sub_vec: |
57 | - t0 = tcg_temp_new_i64(); | 55 | + insn = sub_op[vece]; |
58 | - t1 = tcg_temp_new_i64(); | 56 | + break; |
59 | + TCGv_i64 t0 = tcg_temp_new_i64(); | 57 | case INDEX_op_smin_vec: |
60 | + TCGv_i64 t1 = tcg_temp_new_i64(); | 58 | insn = smin_op[vece]; |
61 | + TCGv_i64 t2 = tcg_const_i64(0x00ff00ff); | 59 | break; |
62 | 60 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | |
63 | - tcg_gen_shli_i64(t0, arg, 24); | 61 | return (TCG_TARGET_REG_BITS == 64 ? &S_S |
64 | - tcg_gen_ext32u_i64(t0, t0); | 62 | : TARGET_LONG_BITS == 32 ? &S_S_S : &S_S_S_S); |
65 | + /* arg = ....abcd */ | 63 | |
66 | + tcg_gen_shri_i64(t0, arg, 8); /* t0 = .....abc */ | 64 | + case INDEX_op_add_vec: |
67 | + tcg_gen_and_i64(t1, arg, t2); /* t1 = .....b.d */ | 65 | + case INDEX_op_sub_vec: |
68 | + tcg_gen_and_i64(t0, t0, t2); /* t0 = .....a.c */ | 66 | case INDEX_op_and_vec: |
69 | + tcg_temp_free_i64(t2); | 67 | case INDEX_op_or_vec: |
70 | + tcg_gen_shli_i64(t1, t1, 8); /* t1 = ....b.d. */ | 68 | case INDEX_op_xor_vec: |
71 | + tcg_gen_or_i64(ret, t0, t1); /* ret = ....badc */ | ||
72 | |||
73 | - tcg_gen_andi_i64(t1, arg, 0x0000ff00); | ||
74 | - tcg_gen_shli_i64(t1, t1, 8); | ||
75 | - tcg_gen_or_i64(t0, t0, t1); | ||
76 | + tcg_gen_shli_i64(t1, ret, 48); /* t1 = dc...... */ | ||
77 | + tcg_gen_shri_i64(t0, ret, 16); /* t0 = ......ba */ | ||
78 | + tcg_gen_shri_i64(t1, ret, 32); /* t1 = ....dc.. */ | ||
79 | + tcg_gen_or_i64(ret, t0, t1); /* ret = ....dcba */ | ||
80 | |||
81 | - tcg_gen_shri_i64(t1, arg, 8); | ||
82 | - tcg_gen_andi_i64(t1, t1, 0x0000ff00); | ||
83 | - tcg_gen_or_i64(t0, t0, t1); | ||
84 | - | ||
85 | - tcg_gen_shri_i64(t1, arg, 24); | ||
86 | - tcg_gen_or_i64(ret, t0, t1); | ||
87 | tcg_temp_free_i64(t0); | ||
88 | tcg_temp_free_i64(t1); | ||
89 | } | ||
90 | -- | 69 | -- |
91 | 2.17.2 | 70 | 2.17.1 |
92 | 71 | ||
93 | 72 | diff view generated by jsdifflib |
1 | This helps preserve the invariant that all TCG_TYPE_I32 values | 1 | Add support for vector saturated add/subtract using Altivec |
---|---|---|---|
2 | are stored zero-extended in the 64-bit host registers. | 2 | instructions: |
3 | VADDSBS, VADDSHS, VADDSWS, VADDUBS, VADDUHS, VADDUWS, and | ||
4 | VSUBSBS, VSUBSHS, VSUBSWS, VSUBUBS, VSUBUHS, VSUBUWS. | ||
3 | 5 | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
7 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
5 | --- | 8 | --- |
6 | tcg/i386/tcg-target.inc.c | 13 ++++++++----- | 9 | tcg/ppc/tcg-target.h | 2 +- |
7 | 1 file changed, 8 insertions(+), 5 deletions(-) | 10 | tcg/ppc/tcg-target.inc.c | 36 ++++++++++++++++++++++++++++++++++++ |
11 | 2 files changed, 37 insertions(+), 1 deletion(-) | ||
8 | 12 | ||
9 | diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c | 13 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h |
10 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
11 | --- a/tcg/i386/tcg-target.inc.c | 15 | --- a/tcg/ppc/tcg-target.h |
12 | +++ b/tcg/i386/tcg-target.inc.c | 16 | +++ b/tcg/ppc/tcg-target.h |
13 | @@ -XXX,XX +XXX,XX @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, | 17 | @@ -XXX,XX +XXX,XX @@ extern bool have_altivec; |
14 | * Record the context of a call to the out of line helper code for the slow path | 18 | #define TCG_TARGET_HAS_shv_vec 0 |
15 | * for a load or store, so that we can later generate the correct helper code | 19 | #define TCG_TARGET_HAS_cmp_vec 1 |
16 | */ | 20 | #define TCG_TARGET_HAS_mul_vec 0 |
17 | -static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, | 21 | -#define TCG_TARGET_HAS_sat_vec 0 |
18 | +static void add_qemu_ldst_label(TCGContext *s, bool is_ld, bool is_64, | 22 | +#define TCG_TARGET_HAS_sat_vec 1 |
19 | + TCGMemOpIdx oi, | 23 | #define TCG_TARGET_HAS_minmax_vec 1 |
20 | TCGReg datalo, TCGReg datahi, | 24 | #define TCG_TARGET_HAS_bitsel_vec 0 |
21 | TCGReg addrlo, TCGReg addrhi, | 25 | #define TCG_TARGET_HAS_cmpsel_vec 0 |
22 | tcg_insn_unit *raddr, | 26 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c |
23 | @@ -XXX,XX +XXX,XX @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, | 27 | index XXXXXXX..XXXXXXX 100644 |
24 | 28 | --- a/tcg/ppc/tcg-target.inc.c | |
25 | label->is_ld = is_ld; | 29 | +++ b/tcg/ppc/tcg-target.inc.c |
26 | label->oi = oi; | 30 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, |
27 | + label->type = is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32; | 31 | #define STVX XO31(231) |
28 | label->datalo_reg = datalo; | 32 | #define STVEWX XO31(199) |
29 | label->datahi_reg = datahi; | 33 | |
30 | label->addrlo_reg = addrlo; | 34 | +#define VADDSBS VX4(768) |
31 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) | 35 | +#define VADDUBS VX4(512) |
32 | TCGMemOp opc = get_memop(oi); | 36 | #define VADDUBM VX4(0) |
33 | TCGReg data_reg; | 37 | +#define VADDSHS VX4(832) |
34 | tcg_insn_unit **label_ptr = &l->label_ptr[0]; | 38 | +#define VADDUHS VX4(576) |
35 | + int rexw = (l->type == TCG_TYPE_I64 ? P_REXW : 0); | 39 | #define VADDUHM VX4(64) |
36 | 40 | +#define VADDSWS VX4(896) | |
37 | /* resolve label address */ | 41 | +#define VADDUWS VX4(640) |
38 | tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4); | 42 | #define VADDUWM VX4(128) |
39 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) | 43 | |
40 | data_reg = l->datalo_reg; | 44 | +#define VSUBSBS VX4(1792) |
41 | switch (opc & MO_SSIZE) { | 45 | +#define VSUBUBS VX4(1536) |
42 | case MO_SB: | 46 | #define VSUBUBM VX4(1024) |
43 | - tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW); | 47 | +#define VSUBSHS VX4(1856) |
44 | + tcg_out_ext8s(s, data_reg, TCG_REG_EAX, rexw); | 48 | +#define VSUBUHS VX4(1600) |
49 | #define VSUBUHM VX4(1088) | ||
50 | +#define VSUBSWS VX4(1920) | ||
51 | +#define VSUBUWS VX4(1664) | ||
52 | #define VSUBUWM VX4(1152) | ||
53 | |||
54 | #define VMAXSB VX4(258) | ||
55 | @@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | ||
56 | case INDEX_op_smin_vec: | ||
57 | case INDEX_op_umax_vec: | ||
58 | case INDEX_op_umin_vec: | ||
59 | + case INDEX_op_ssadd_vec: | ||
60 | + case INDEX_op_sssub_vec: | ||
61 | + case INDEX_op_usadd_vec: | ||
62 | + case INDEX_op_ussub_vec: | ||
63 | return vece <= MO_32; | ||
64 | case INDEX_op_cmp_vec: | ||
65 | return vece <= MO_32 ? -1 : 0; | ||
66 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
67 | eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 }, | ||
68 | gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 }, | ||
69 | gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 }, | ||
70 | + ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 }, | ||
71 | + usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 }, | ||
72 | + sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 }, | ||
73 | + ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 }, | ||
74 | umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 }, | ||
75 | smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 }, | ||
76 | umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 }, | ||
77 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
78 | case INDEX_op_sub_vec: | ||
79 | insn = sub_op[vece]; | ||
45 | break; | 80 | break; |
46 | case MO_SW: | 81 | + case INDEX_op_ssadd_vec: |
47 | - tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW); | 82 | + insn = ssadd_op[vece]; |
48 | + tcg_out_ext16s(s, data_reg, TCG_REG_EAX, rexw); | 83 | + break; |
84 | + case INDEX_op_sssub_vec: | ||
85 | + insn = sssub_op[vece]; | ||
86 | + break; | ||
87 | + case INDEX_op_usadd_vec: | ||
88 | + insn = usadd_op[vece]; | ||
89 | + break; | ||
90 | + case INDEX_op_ussub_vec: | ||
91 | + insn = ussub_op[vece]; | ||
92 | + break; | ||
93 | case INDEX_op_smin_vec: | ||
94 | insn = smin_op[vece]; | ||
49 | break; | 95 | break; |
50 | #if TCG_TARGET_REG_BITS == 64 | 96 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) |
51 | case MO_SL: | 97 | case INDEX_op_andc_vec: |
52 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) | 98 | case INDEX_op_orc_vec: |
53 | tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, is64, opc); | 99 | case INDEX_op_cmp_vec: |
54 | 100 | + case INDEX_op_ssadd_vec: | |
55 | /* Record the current context of a load into ldst label */ | 101 | + case INDEX_op_sssub_vec: |
56 | - add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi, | 102 | + case INDEX_op_usadd_vec: |
57 | + add_qemu_ldst_label(s, true, is64, oi, datalo, datahi, addrlo, addrhi, | 103 | + case INDEX_op_ussub_vec: |
58 | s->code_ptr, label_ptr); | 104 | case INDEX_op_smax_vec: |
59 | #else | 105 | case INDEX_op_smin_vec: |
60 | { | 106 | case INDEX_op_umax_vec: |
61 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) | ||
62 | tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); | ||
63 | |||
64 | /* Record the current context of a store into ldst label */ | ||
65 | - add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi, | ||
66 | + add_qemu_ldst_label(s, false, is64, oi, datalo, datahi, addrlo, addrhi, | ||
67 | s->code_ptr, label_ptr); | ||
68 | #else | ||
69 | { | ||
70 | -- | 107 | -- |
71 | 2.17.2 | 108 | 2.17.1 |
72 | 109 | ||
73 | 110 | diff view generated by jsdifflib |
1 | From: "Emilio G. Cota" <cota@braap.org> | 1 | For Altivec, this is done via vector shift by vector, |
---|---|---|---|
2 | and loading the immediate into a register. | ||
2 | 3 | ||
3 | It's unused since 75e8b9b7aa0b95a761b9add7e2f09248b101a392. | 4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
5 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
6 | --- | ||
7 | tcg/ppc/tcg-target.h | 2 +- | ||
8 | tcg/ppc/tcg-target.inc.c | 58 ++++++++++++++++++++++++++++++++++++++-- | ||
9 | 2 files changed, 57 insertions(+), 3 deletions(-) | ||
4 | 10 | ||
5 | Signed-off-by: Emilio G. Cota <cota@braap.org> | 11 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h |
6 | Message-Id: <20181209193749.12277-9-cota@braap.org> | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | --- | ||
10 | tcg/tcg.h | 4 ++-- | ||
11 | tcg/optimize.c | 4 ++-- | ||
12 | tcg/tcg.c | 10 ++++------ | ||
13 | 3 files changed, 8 insertions(+), 10 deletions(-) | ||
14 | |||
15 | diff --git a/tcg/tcg.h b/tcg/tcg.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/tcg/tcg.h | 13 | --- a/tcg/ppc/tcg-target.h |
18 | +++ b/tcg/tcg.h | 14 | +++ b/tcg/ppc/tcg-target.h |
19 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args); | 15 | @@ -XXX,XX +XXX,XX @@ extern bool have_altivec; |
20 | 16 | #define TCG_TARGET_HAS_abs_vec 0 | |
21 | TCGOp *tcg_emit_op(TCGOpcode opc); | 17 | #define TCG_TARGET_HAS_shi_vec 0 |
22 | void tcg_op_remove(TCGContext *s, TCGOp *op); | 18 | #define TCG_TARGET_HAS_shs_vec 0 |
23 | -TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op, TCGOpcode opc, int narg); | 19 | -#define TCG_TARGET_HAS_shv_vec 0 |
24 | -TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, TCGOpcode opc, int narg); | 20 | +#define TCG_TARGET_HAS_shv_vec 1 |
25 | +TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op, TCGOpcode opc); | 21 | #define TCG_TARGET_HAS_cmp_vec 1 |
26 | +TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, TCGOpcode opc); | 22 | #define TCG_TARGET_HAS_mul_vec 0 |
27 | 23 | #define TCG_TARGET_HAS_sat_vec 1 | |
28 | void tcg_optimize(TCGContext *s); | 24 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c |
29 | |||
30 | diff --git a/tcg/optimize.c b/tcg/optimize.c | ||
31 | index XXXXXXX..XXXXXXX 100644 | 25 | index XXXXXXX..XXXXXXX 100644 |
32 | --- a/tcg/optimize.c | 26 | --- a/tcg/ppc/tcg-target.inc.c |
33 | +++ b/tcg/optimize.c | 27 | +++ b/tcg/ppc/tcg-target.inc.c |
34 | @@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s) | 28 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, |
35 | uint64_t a = ((uint64_t)ah << 32) | al; | 29 | #define VCMPGTUH VX4(582) |
36 | uint64_t b = ((uint64_t)bh << 32) | bl; | 30 | #define VCMPGTUW VX4(646) |
37 | TCGArg rl, rh; | 31 | |
38 | - TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2); | 32 | +#define VSLB VX4(260) |
39 | + TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32); | 33 | +#define VSLH VX4(324) |
40 | 34 | +#define VSLW VX4(388) | |
41 | if (opc == INDEX_op_add2_i32) { | 35 | +#define VSRB VX4(516) |
42 | a += b; | 36 | +#define VSRH VX4(580) |
43 | @@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s) | 37 | +#define VSRW VX4(644) |
44 | uint32_t b = arg_info(op->args[3])->val; | 38 | +#define VSRAB VX4(772) |
45 | uint64_t r = (uint64_t)a * b; | 39 | +#define VSRAH VX4(836) |
46 | TCGArg rl, rh; | 40 | +#define VSRAW VX4(900) |
47 | - TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2); | 41 | + |
48 | + TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32); | 42 | #define VAND VX4(1028) |
49 | 43 | #define VANDC VX4(1092) | |
50 | rl = op->args[0]; | 44 | #define VNOR VX4(1284) |
51 | rh = op->args[1]; | 45 | @@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) |
52 | diff --git a/tcg/tcg.c b/tcg/tcg.c | 46 | case INDEX_op_sssub_vec: |
53 | index XXXXXXX..XXXXXXX 100644 | 47 | case INDEX_op_usadd_vec: |
54 | --- a/tcg/tcg.c | 48 | case INDEX_op_ussub_vec: |
55 | +++ b/tcg/tcg.c | 49 | + case INDEX_op_shlv_vec: |
56 | @@ -XXX,XX +XXX,XX @@ TCGOp *tcg_emit_op(TCGOpcode opc) | 50 | + case INDEX_op_shrv_vec: |
57 | return op; | 51 | + case INDEX_op_sarv_vec: |
52 | return vece <= MO_32; | ||
53 | case INDEX_op_cmp_vec: | ||
54 | + case INDEX_op_shli_vec: | ||
55 | + case INDEX_op_shri_vec: | ||
56 | + case INDEX_op_sari_vec: | ||
57 | return vece <= MO_32 ? -1 : 0; | ||
58 | default: | ||
59 | return 0; | ||
60 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
61 | umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 }, | ||
62 | smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 }, | ||
63 | umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 }, | ||
64 | - smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 }; | ||
65 | + smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 }, | ||
66 | + shlv_op[4] = { VSLB, VSLH, VSLW, 0 }, | ||
67 | + shrv_op[4] = { VSRB, VSRH, VSRW, 0 }, | ||
68 | + sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 }; | ||
69 | |||
70 | TCGType type = vecl + TCG_TYPE_V64; | ||
71 | TCGArg a0 = args[0], a1 = args[1], a2 = args[2]; | ||
72 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
73 | case INDEX_op_umax_vec: | ||
74 | insn = umax_op[vece]; | ||
75 | break; | ||
76 | + case INDEX_op_shlv_vec: | ||
77 | + insn = shlv_op[vece]; | ||
78 | + break; | ||
79 | + case INDEX_op_shrv_vec: | ||
80 | + insn = shrv_op[vece]; | ||
81 | + break; | ||
82 | + case INDEX_op_sarv_vec: | ||
83 | + insn = sarv_op[vece]; | ||
84 | + break; | ||
85 | case INDEX_op_and_vec: | ||
86 | insn = VAND; | ||
87 | break; | ||
88 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
89 | tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2)); | ||
58 | } | 90 | } |
59 | 91 | ||
60 | -TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, | 92 | +static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0, |
61 | - TCGOpcode opc, int nargs) | 93 | + TCGv_vec v1, TCGArg imm, TCGOpcode opci) |
62 | +TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc) | 94 | +{ |
95 | + TCGv_vec t1 = tcg_temp_new_vec(type); | ||
96 | + | ||
97 | + /* Splat w/bytes for xxspltib. */ | ||
98 | + tcg_gen_dupi_vec(MO_8, t1, imm & ((8 << vece) - 1)); | ||
99 | + vec_gen_3(opci, type, vece, tcgv_vec_arg(v0), | ||
100 | + tcgv_vec_arg(v1), tcgv_vec_arg(t1)); | ||
101 | + tcg_temp_free_vec(t1); | ||
102 | +} | ||
103 | + | ||
104 | static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, | ||
105 | TCGv_vec v1, TCGv_vec v2, TCGCond cond) | ||
63 | { | 106 | { |
64 | TCGOp *new_op = tcg_op_alloc(opc); | 107 | @@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, |
65 | QTAILQ_INSERT_BEFORE(old_op, new_op, link); | ||
66 | return new_op; | ||
67 | } | ||
68 | |||
69 | -TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, | ||
70 | - TCGOpcode opc, int nargs) | ||
71 | +TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc) | ||
72 | { | 108 | { |
73 | TCGOp *new_op = tcg_op_alloc(opc); | 109 | va_list va; |
74 | QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link); | 110 | TCGv_vec v0, v1, v2; |
75 | @@ -XXX,XX +XXX,XX @@ static bool liveness_pass_2(TCGContext *s) | 111 | + TCGArg a2; |
76 | TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32 | 112 | |
77 | ? INDEX_op_ld_i32 | 113 | va_start(va, a0); |
78 | : INDEX_op_ld_i64); | 114 | v0 = temp_tcgv_vec(arg_temp(a0)); |
79 | - TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3); | 115 | v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); |
80 | + TCGOp *lop = tcg_op_insert_before(s, op, lopc); | 116 | - v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); |
81 | 117 | + a2 = va_arg(va, TCGArg); | |
82 | lop->args[0] = temp_arg(dir_ts); | 118 | |
83 | lop->args[1] = temp_arg(arg_ts->mem_base); | 119 | switch (opc) { |
84 | @@ -XXX,XX +XXX,XX @@ static bool liveness_pass_2(TCGContext *s) | 120 | + case INDEX_op_shli_vec: |
85 | TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32 | 121 | + expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec); |
86 | ? INDEX_op_st_i32 | 122 | + break; |
87 | : INDEX_op_st_i64); | 123 | + case INDEX_op_shri_vec: |
88 | - TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3); | 124 | + expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec); |
89 | + TCGOp *sop = tcg_op_insert_after(s, op, sopc); | 125 | + break; |
90 | 126 | + case INDEX_op_sari_vec: | |
91 | sop->args[0] = temp_arg(dir_ts); | 127 | + expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec); |
92 | sop->args[1] = temp_arg(arg_ts->mem_base); | 128 | + break; |
129 | case INDEX_op_cmp_vec: | ||
130 | + v2 = temp_tcgv_vec(arg_temp(a2)); | ||
131 | expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg)); | ||
132 | break; | ||
133 | default: | ||
134 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
135 | case INDEX_op_smin_vec: | ||
136 | case INDEX_op_umax_vec: | ||
137 | case INDEX_op_umin_vec: | ||
138 | + case INDEX_op_shlv_vec: | ||
139 | + case INDEX_op_shrv_vec: | ||
140 | + case INDEX_op_sarv_vec: | ||
141 | return &v_v_v; | ||
142 | case INDEX_op_not_vec: | ||
143 | case INDEX_op_dup_vec: | ||
93 | -- | 144 | -- |
94 | 2.17.2 | 145 | 2.17.1 |
95 | 146 | ||
96 | 147 | diff view generated by jsdifflib |
1 | Based on the only current user, Sparc: | 1 | For Altivec, this is always an expansion. |
---|---|---|---|
2 | |||
3 | New code uses 2 constants that take 2 insns to load from constant pool, | ||
4 | plus 13. Old code used 6 constants that took 1 or 2 insns to create, | ||
5 | plus 21. The result is a new total of 17 vs an old total of 29. | ||
6 | 2 | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
4 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
8 | --- | 5 | --- |
9 | tcg/tcg-op.c | 43 ++++++++++++++++++------------------------- | 6 | tcg/ppc/tcg-target.h | 2 +- |
10 | 1 file changed, 18 insertions(+), 25 deletions(-) | 7 | tcg/ppc/tcg-target.opc.h | 8 +++ |
11 | 8 | tcg/ppc/tcg-target.inc.c | 113 ++++++++++++++++++++++++++++++++++++++- | |
12 | diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c | 9 | 3 files changed, 121 insertions(+), 2 deletions(-) |
10 | |||
11 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/tcg/tcg-op.c | 13 | --- a/tcg/ppc/tcg-target.h |
15 | +++ b/tcg/tcg-op.c | 14 | +++ b/tcg/ppc/tcg-target.h |
16 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg) | 15 | @@ -XXX,XX +XXX,XX @@ extern bool have_altivec; |
17 | } else { | 16 | #define TCG_TARGET_HAS_shs_vec 0 |
18 | TCGv_i64 t0 = tcg_temp_new_i64(); | 17 | #define TCG_TARGET_HAS_shv_vec 1 |
19 | TCGv_i64 t1 = tcg_temp_new_i64(); | 18 | #define TCG_TARGET_HAS_cmp_vec 1 |
20 | + TCGv_i64 t2 = tcg_temp_new_i64(); | 19 | -#define TCG_TARGET_HAS_mul_vec 0 |
21 | 20 | +#define TCG_TARGET_HAS_mul_vec 1 | |
22 | - tcg_gen_shli_i64(t0, arg, 56); | 21 | #define TCG_TARGET_HAS_sat_vec 1 |
23 | + /* arg = abcdefgh */ | 22 | #define TCG_TARGET_HAS_minmax_vec 1 |
24 | + tcg_gen_movi_i64(t2, 0x00ff00ff00ff00ffull); | 23 | #define TCG_TARGET_HAS_bitsel_vec 0 |
25 | + tcg_gen_shri_i64(t0, arg, 8); /* t0 = .abcdefg */ | 24 | diff --git a/tcg/ppc/tcg-target.opc.h b/tcg/ppc/tcg-target.opc.h |
26 | + tcg_gen_and_i64(t1, arg, t2); /* t1 = .b.d.f.h */ | 25 | index XXXXXXX..XXXXXXX 100644 |
27 | + tcg_gen_and_i64(t0, t0, t2); /* t0 = .a.c.e.g */ | 26 | --- a/tcg/ppc/tcg-target.opc.h |
28 | + tcg_gen_shli_i64(t1, t1, 8); /* t1 = b.d.f.h. */ | 27 | +++ b/tcg/ppc/tcg-target.opc.h |
29 | + tcg_gen_or_i64(ret, t0, t1); /* ret = badcfehg */ | 28 | @@ -XXX,XX +XXX,XX @@ |
30 | 29 | * emitted by tcg_expand_vec_op. For those familiar with GCC internals, | |
31 | - tcg_gen_andi_i64(t1, arg, 0x0000ff00); | 30 | * consider these to be UNSPEC with names. |
32 | - tcg_gen_shli_i64(t1, t1, 40); | 31 | */ |
33 | - tcg_gen_or_i64(t0, t0, t1); | 32 | + |
34 | + tcg_gen_movi_i64(t2, 0x0000ffff0000ffffull); | 33 | +DEF(ppc_mrgh_vec, 1, 2, 0, IMPLVEC) |
35 | + tcg_gen_shri_i64(t0, ret, 16); /* t0 = ..badcfe */ | 34 | +DEF(ppc_mrgl_vec, 1, 2, 0, IMPLVEC) |
36 | + tcg_gen_and_i64(t1, ret, t2); /* t1 = ..dc..hg */ | 35 | +DEF(ppc_msum_vec, 1, 3, 0, IMPLVEC) |
37 | + tcg_gen_and_i64(t0, t0, t2); /* t0 = ..ba..fe */ | 36 | +DEF(ppc_muleu_vec, 1, 2, 0, IMPLVEC) |
38 | + tcg_gen_shli_i64(t1, t1, 16); /* t1 = dc..hg.. */ | 37 | +DEF(ppc_mulou_vec, 1, 2, 0, IMPLVEC) |
39 | + tcg_gen_or_i64(ret, t0, t1); /* ret = dcbahgfe */ | 38 | +DEF(ppc_pkum_vec, 1, 2, 0, IMPLVEC) |
40 | 39 | +DEF(ppc_rotl_vec, 1, 2, 0, IMPLVEC) | |
41 | - tcg_gen_andi_i64(t1, arg, 0x00ff0000); | 40 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c |
42 | - tcg_gen_shli_i64(t1, t1, 24); | 41 | index XXXXXXX..XXXXXXX 100644 |
43 | - tcg_gen_or_i64(t0, t0, t1); | 42 | --- a/tcg/ppc/tcg-target.inc.c |
44 | + tcg_gen_shri_i64(t0, ret, 32); /* t0 = ....dcba */ | 43 | +++ b/tcg/ppc/tcg-target.inc.c |
45 | + tcg_gen_shli_i64(t1, ret, 32); /* t1 = hgfe.... */ | 44 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, |
46 | + tcg_gen_or_i64(ret, t0, t1); /* ret = hgfedcba */ | 45 | #define VSRAB VX4(772) |
47 | 46 | #define VSRAH VX4(836) | |
48 | - tcg_gen_andi_i64(t1, arg, 0xff000000); | 47 | #define VSRAW VX4(900) |
49 | - tcg_gen_shli_i64(t1, t1, 8); | 48 | +#define VRLB VX4(4) |
50 | - tcg_gen_or_i64(t0, t0, t1); | 49 | +#define VRLH VX4(68) |
51 | - | 50 | +#define VRLW VX4(132) |
52 | - tcg_gen_shri_i64(t1, arg, 8); | 51 | + |
53 | - tcg_gen_andi_i64(t1, t1, 0xff000000); | 52 | +#define VMULEUB VX4(520) |
54 | - tcg_gen_or_i64(t0, t0, t1); | 53 | +#define VMULEUH VX4(584) |
55 | - | 54 | +#define VMULOUB VX4(8) |
56 | - tcg_gen_shri_i64(t1, arg, 24); | 55 | +#define VMULOUH VX4(72) |
57 | - tcg_gen_andi_i64(t1, t1, 0x00ff0000); | 56 | +#define VMSUMUHM VX4(38) |
58 | - tcg_gen_or_i64(t0, t0, t1); | 57 | + |
59 | - | 58 | +#define VMRGHB VX4(12) |
60 | - tcg_gen_shri_i64(t1, arg, 40); | 59 | +#define VMRGHH VX4(76) |
61 | - tcg_gen_andi_i64(t1, t1, 0x0000ff00); | 60 | +#define VMRGHW VX4(140) |
62 | - tcg_gen_or_i64(t0, t0, t1); | 61 | +#define VMRGLB VX4(268) |
63 | - | 62 | +#define VMRGLH VX4(332) |
64 | - tcg_gen_shri_i64(t1, arg, 56); | 63 | +#define VMRGLW VX4(396) |
65 | - tcg_gen_or_i64(ret, t0, t1); | 64 | + |
66 | tcg_temp_free_i64(t0); | 65 | +#define VPKUHUM VX4(14) |
67 | tcg_temp_free_i64(t1); | 66 | +#define VPKUWUM VX4(78) |
68 | + tcg_temp_free_i64(t2); | 67 | |
68 | #define VAND VX4(1028) | ||
69 | #define VANDC VX4(1092) | ||
70 | @@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | ||
71 | case INDEX_op_sarv_vec: | ||
72 | return vece <= MO_32; | ||
73 | case INDEX_op_cmp_vec: | ||
74 | + case INDEX_op_mul_vec: | ||
75 | case INDEX_op_shli_vec: | ||
76 | case INDEX_op_shri_vec: | ||
77 | case INDEX_op_sari_vec: | ||
78 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
79 | smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 }, | ||
80 | shlv_op[4] = { VSLB, VSLH, VSLW, 0 }, | ||
81 | shrv_op[4] = { VSRB, VSRH, VSRW, 0 }, | ||
82 | - sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 }; | ||
83 | + sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 }, | ||
84 | + mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 }, | ||
85 | + mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 }, | ||
86 | + muleu_op[4] = { VMULEUB, VMULEUH, 0, 0 }, | ||
87 | + mulou_op[4] = { VMULOUB, VMULOUH, 0, 0 }, | ||
88 | + pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 }, | ||
89 | + rotl_op[4] = { VRLB, VRLH, VRLW, 0 }; | ||
90 | |||
91 | TCGType type = vecl + TCG_TYPE_V64; | ||
92 | TCGArg a0 = args[0], a1 = args[1], a2 = args[2]; | ||
93 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
94 | } | ||
95 | break; | ||
96 | |||
97 | + case INDEX_op_ppc_mrgh_vec: | ||
98 | + insn = mrgh_op[vece]; | ||
99 | + break; | ||
100 | + case INDEX_op_ppc_mrgl_vec: | ||
101 | + insn = mrgl_op[vece]; | ||
102 | + break; | ||
103 | + case INDEX_op_ppc_muleu_vec: | ||
104 | + insn = muleu_op[vece]; | ||
105 | + break; | ||
106 | + case INDEX_op_ppc_mulou_vec: | ||
107 | + insn = mulou_op[vece]; | ||
108 | + break; | ||
109 | + case INDEX_op_ppc_pkum_vec: | ||
110 | + insn = pkum_op[vece]; | ||
111 | + break; | ||
112 | + case INDEX_op_ppc_rotl_vec: | ||
113 | + insn = rotl_op[vece]; | ||
114 | + break; | ||
115 | + case INDEX_op_ppc_msum_vec: | ||
116 | + tcg_debug_assert(vece == MO_16); | ||
117 | + tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3])); | ||
118 | + return; | ||
119 | + | ||
120 | case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ | ||
121 | case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */ | ||
122 | case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ | ||
123 | @@ -XXX,XX +XXX,XX @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, | ||
69 | } | 124 | } |
70 | } | 125 | } |
71 | 126 | ||
127 | +static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0, | ||
128 | + TCGv_vec v1, TCGv_vec v2) | ||
129 | +{ | ||
130 | + TCGv_vec t1 = tcg_temp_new_vec(type); | ||
131 | + TCGv_vec t2 = tcg_temp_new_vec(type); | ||
132 | + TCGv_vec t3, t4; | ||
133 | + | ||
134 | + switch (vece) { | ||
135 | + case MO_8: | ||
136 | + case MO_16: | ||
137 | + vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1), | ||
138 | + tcgv_vec_arg(v1), tcgv_vec_arg(v2)); | ||
139 | + vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2), | ||
140 | + tcgv_vec_arg(v1), tcgv_vec_arg(v2)); | ||
141 | + vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0), | ||
142 | + tcgv_vec_arg(t1), tcgv_vec_arg(t2)); | ||
143 | + vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1), | ||
144 | + tcgv_vec_arg(t1), tcgv_vec_arg(t2)); | ||
145 | + vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0), | ||
146 | + tcgv_vec_arg(v0), tcgv_vec_arg(t1)); | ||
147 | + break; | ||
148 | + | ||
149 | + case MO_32: | ||
150 | + t3 = tcg_temp_new_vec(type); | ||
151 | + t4 = tcg_temp_new_vec(type); | ||
152 | + tcg_gen_dupi_vec(MO_8, t4, -16); | ||
153 | + vec_gen_3(INDEX_op_ppc_rotl_vec, type, MO_32, tcgv_vec_arg(t1), | ||
154 | + tcgv_vec_arg(v2), tcgv_vec_arg(t4)); | ||
155 | + vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2), | ||
156 | + tcgv_vec_arg(v1), tcgv_vec_arg(v2)); | ||
157 | + tcg_gen_dupi_vec(MO_8, t3, 0); | ||
158 | + vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t3), | ||
159 | + tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(t3)); | ||
160 | + vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t3), | ||
161 | + tcgv_vec_arg(t3), tcgv_vec_arg(t4)); | ||
162 | + tcg_gen_add_vec(MO_32, v0, t2, t3); | ||
163 | + tcg_temp_free_vec(t3); | ||
164 | + tcg_temp_free_vec(t4); | ||
165 | + break; | ||
166 | + | ||
167 | + default: | ||
168 | + g_assert_not_reached(); | ||
169 | + } | ||
170 | + tcg_temp_free_vec(t1); | ||
171 | + tcg_temp_free_vec(t2); | ||
172 | +} | ||
173 | + | ||
174 | void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, | ||
175 | TCGArg a0, ...) | ||
176 | { | ||
177 | @@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, | ||
178 | v2 = temp_tcgv_vec(arg_temp(a2)); | ||
179 | expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg)); | ||
180 | break; | ||
181 | + case INDEX_op_mul_vec: | ||
182 | + v2 = temp_tcgv_vec(arg_temp(a2)); | ||
183 | + expand_vec_mul(type, vece, v0, v1, v2); | ||
184 | + break; | ||
185 | default: | ||
186 | g_assert_not_reached(); | ||
187 | } | ||
188 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
189 | static const TCGTargetOpDef v_r = { .args_ct_str = { "v", "r" } }; | ||
190 | static const TCGTargetOpDef v_v = { .args_ct_str = { "v", "v" } }; | ||
191 | static const TCGTargetOpDef v_v_v = { .args_ct_str = { "v", "v", "v" } }; | ||
192 | + static const TCGTargetOpDef v_v_v_v | ||
193 | + = { .args_ct_str = { "v", "v", "v", "v" } }; | ||
194 | |||
195 | switch (op) { | ||
196 | case INDEX_op_goto_ptr: | ||
197 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
198 | |||
199 | case INDEX_op_add_vec: | ||
200 | case INDEX_op_sub_vec: | ||
201 | + case INDEX_op_mul_vec: | ||
202 | case INDEX_op_and_vec: | ||
203 | case INDEX_op_or_vec: | ||
204 | case INDEX_op_xor_vec: | ||
205 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
206 | case INDEX_op_shlv_vec: | ||
207 | case INDEX_op_shrv_vec: | ||
208 | case INDEX_op_sarv_vec: | ||
209 | + case INDEX_op_ppc_mrgh_vec: | ||
210 | + case INDEX_op_ppc_mrgl_vec: | ||
211 | + case INDEX_op_ppc_muleu_vec: | ||
212 | + case INDEX_op_ppc_mulou_vec: | ||
213 | + case INDEX_op_ppc_pkum_vec: | ||
214 | + case INDEX_op_ppc_rotl_vec: | ||
215 | return &v_v_v; | ||
216 | case INDEX_op_not_vec: | ||
217 | case INDEX_op_dup_vec: | ||
218 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
219 | case INDEX_op_st_vec: | ||
220 | case INDEX_op_dupm_vec: | ||
221 | return &v_r; | ||
222 | + case INDEX_op_ppc_msum_vec: | ||
223 | + return &v_v_v_v; | ||
224 | |||
225 | default: | ||
226 | return NULL; | ||
72 | -- | 227 | -- |
73 | 2.17.2 | 228 | 2.17.1 |
74 | 229 | ||
75 | 230 | diff view generated by jsdifflib |
1 | There is no longer a need for preserving branch offset operands, | 1 | This is only used for 32-bit hosts. |
---|---|---|---|
2 | as we no longer re-translate. | ||
3 | 2 | ||
4 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
4 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
6 | --- | 5 | --- |
7 | tcg/ppc/tcg-target.inc.c | 25 +++++++------------------ | 6 | tcg/ppc/tcg-target.inc.c | 9 +++++++++ |
8 | 1 file changed, 7 insertions(+), 18 deletions(-) | 7 | 1 file changed, 9 insertions(+) |
9 | 8 | ||
10 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 9 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c |
11 | index XXXXXXX..XXXXXXX 100644 | 10 | index XXXXXXX..XXXXXXX 100644 |
12 | --- a/tcg/ppc/tcg-target.inc.c | 11 | --- a/tcg/ppc/tcg-target.inc.c |
13 | +++ b/tcg/ppc/tcg-target.inc.c | 12 | +++ b/tcg/ppc/tcg-target.inc.c |
14 | @@ -XXX,XX +XXX,XX @@ static void reloc_pc14(tcg_insn_unit *pc, tcg_insn_unit *target) | 13 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, |
15 | *pc = (*pc & ~0xfffc) | reloc_pc14_val(pc, target); | ||
16 | } | ||
17 | |||
18 | -static inline void tcg_out_b_noaddr(TCGContext *s, int insn) | ||
19 | -{ | ||
20 | - unsigned retrans = *s->code_ptr & 0x3fffffc; | ||
21 | - tcg_out32(s, insn | retrans); | ||
22 | -} | ||
23 | - | ||
24 | -static inline void tcg_out_bc_noaddr(TCGContext *s, int insn) | ||
25 | -{ | ||
26 | - unsigned retrans = *s->code_ptr & 0xfffc; | ||
27 | - tcg_out32(s, insn | retrans); | ||
28 | -} | ||
29 | - | ||
30 | /* parse target specific constraints */ | ||
31 | static const char *target_parse_constraint(TCGArgConstraint *ct, | ||
32 | const char *ct_str, TCGType type) | ||
33 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, | ||
34 | static void tcg_out_bc(TCGContext *s, int bc, TCGLabel *l) | ||
35 | { | ||
36 | if (l->has_value) { | ||
37 | - tcg_out32(s, bc | reloc_pc14_val(s->code_ptr, l->u.value_ptr)); | ||
38 | + bc |= reloc_pc14_val(s->code_ptr, l->u.value_ptr); | ||
39 | } else { | ||
40 | tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0); | ||
41 | - tcg_out_bc_noaddr(s, bc); | ||
42 | } | ||
43 | + tcg_out32(s, bc); | ||
44 | } | ||
45 | |||
46 | static void tcg_out_brcond(TCGContext *s, TCGCond cond, | ||
47 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) | ||
48 | |||
49 | /* Load a pointer into the current opcode w/conditional branch-link. */ | ||
50 | label_ptr = s->code_ptr; | ||
51 | - tcg_out_bc_noaddr(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); | ||
52 | + tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); | ||
53 | |||
54 | rbase = TCG_REG_R3; | ||
55 | #else /* !CONFIG_SOFTMMU */ | ||
56 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) | ||
57 | |||
58 | /* Load a pointer into the current opcode w/conditional branch-link. */ | ||
59 | label_ptr = s->code_ptr; | ||
60 | - tcg_out_bc_noaddr(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); | ||
61 | + tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK); | ||
62 | |||
63 | rbase = TCG_REG_R3; | ||
64 | #else /* !CONFIG_SOFTMMU */ | ||
65 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, | ||
66 | case INDEX_op_br: | ||
67 | { | ||
68 | TCGLabel *l = arg_label(args[0]); | ||
69 | + uint32_t insn = B; | ||
70 | |||
71 | if (l->has_value) { | ||
72 | - tcg_out_b(s, 0, l->u.value_ptr); | ||
73 | + insn |= reloc_pc24_val(s->code_ptr, l->u.value_ptr); | ||
74 | } else { | ||
75 | tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0); | ||
76 | - tcg_out_b_noaddr(s, B); | ||
77 | } | ||
78 | + tcg_out32(s, insn); | ||
79 | } | 14 | } |
80 | break; | 15 | break; |
81 | case INDEX_op_ld8u_i32: | 16 | |
17 | + case INDEX_op_dup2_vec: | ||
18 | + assert(TCG_TARGET_REG_BITS == 32); | ||
19 | + /* With inputs a1 = xLxx, a2 = xHxx */ | ||
20 | + tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1)); /* a0 = xxHL */ | ||
21 | + tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8); /* tmp = HLxx */ | ||
22 | + tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8); /* a0 = HLHL */ | ||
23 | + return; | ||
24 | + | ||
25 | case INDEX_op_ppc_mrgh_vec: | ||
26 | insn = mrgh_op[vece]; | ||
27 | break; | ||
28 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
29 | case INDEX_op_ppc_mulou_vec: | ||
30 | case INDEX_op_ppc_pkum_vec: | ||
31 | case INDEX_op_ppc_rotl_vec: | ||
32 | + case INDEX_op_dup2_vec: | ||
33 | return &v_v_v; | ||
34 | case INDEX_op_not_vec: | ||
35 | case INDEX_op_dup_vec: | ||
82 | -- | 36 | -- |
83 | 2.17.2 | 37 | 2.17.1 |
84 | 38 | ||
85 | 39 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | There is no longer a need for preserving branch offset operands, | ||
2 | as we no longer re-translate. | ||
3 | 1 | ||
4 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | tcg/s390/tcg-target.inc.c | 10 ++-------- | ||
8 | 1 file changed, 2 insertions(+), 8 deletions(-) | ||
9 | |||
10 | diff --git a/tcg/s390/tcg-target.inc.c b/tcg/s390/tcg-target.inc.c | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/tcg/s390/tcg-target.inc.c | ||
13 | +++ b/tcg/s390/tcg-target.inc.c | ||
14 | @@ -XXX,XX +XXX,XX @@ static void tgen_branch(TCGContext *s, int cc, TCGLabel *l) | ||
15 | static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc, | ||
16 | TCGReg r1, TCGReg r2, TCGLabel *l) | ||
17 | { | ||
18 | - intptr_t off; | ||
19 | + intptr_t off = 0; | ||
20 | |||
21 | if (l->has_value) { | ||
22 | off = l->u.value_ptr - s->code_ptr; | ||
23 | } else { | ||
24 | - /* We need to keep the offset unchanged for retranslation. */ | ||
25 | - off = s->code_ptr[1]; | ||
26 | tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2); | ||
27 | } | ||
28 | |||
29 | @@ -XXX,XX +XXX,XX @@ static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc, | ||
30 | static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc, | ||
31 | TCGReg r1, int i2, TCGLabel *l) | ||
32 | { | ||
33 | - tcg_target_long off; | ||
34 | + tcg_target_long off = 0; | ||
35 | |||
36 | if (l->has_value) { | ||
37 | off = l->u.value_ptr - s->code_ptr; | ||
38 | } else { | ||
39 | - /* We need to keep the offset unchanged for retranslation. */ | ||
40 | - off = s->code_ptr[1]; | ||
41 | tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2); | ||
42 | } | ||
43 | |||
44 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, | ||
45 | |||
46 | base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1); | ||
47 | |||
48 | - /* We need to keep the offset unchanged for retranslation. */ | ||
49 | tcg_out16(s, RI_BRC | (S390_CC_NE << 4)); | ||
50 | label_ptr = s->code_ptr; | ||
51 | s->code_ptr += 1; | ||
52 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, | ||
53 | |||
54 | base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0); | ||
55 | |||
56 | - /* We need to keep the offset unchanged for retranslation. */ | ||
57 | tcg_out16(s, RI_BRC | (S390_CC_NE << 4)); | ||
58 | label_ptr = s->code_ptr; | ||
59 | s->code_ptr += 1; | ||
60 | -- | ||
61 | 2.17.2 | ||
62 | |||
63 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | There is no longer a need for preserving branch offset operands, | ||
2 | as we no longer re-translate. | ||
3 | 1 | ||
4 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | tcg/sparc/tcg-target.inc.c | 8 ++------ | ||
8 | 1 file changed, 2 insertions(+), 6 deletions(-) | ||
9 | |||
10 | diff --git a/tcg/sparc/tcg-target.inc.c b/tcg/sparc/tcg-target.inc.c | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/tcg/sparc/tcg-target.inc.c | ||
13 | +++ b/tcg/sparc/tcg-target.inc.c | ||
14 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_bpcc0(TCGContext *s, int scond, int flags, int off19) | ||
15 | |||
16 | static void tcg_out_bpcc(TCGContext *s, int scond, int flags, TCGLabel *l) | ||
17 | { | ||
18 | - int off19; | ||
19 | + int off19 = 0; | ||
20 | |||
21 | if (l->has_value) { | ||
22 | off19 = INSN_OFF19(tcg_pcrel_diff(s, l->u.value_ptr)); | ||
23 | } else { | ||
24 | - /* Make sure to preserve destinations during retranslation. */ | ||
25 | - off19 = *s->code_ptr & INSN_OFF19(-1); | ||
26 | tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, l, 0); | ||
27 | } | ||
28 | tcg_out_bpcc0(s, scond, flags, off19); | ||
29 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGReg arg1, | ||
30 | { | ||
31 | /* For 64-bit signed comparisons vs zero, we can avoid the compare. */ | ||
32 | if (arg2 == 0 && !is_unsigned_cond(cond)) { | ||
33 | - int off16; | ||
34 | + int off16 = 0; | ||
35 | |||
36 | if (l->has_value) { | ||
37 | off16 = INSN_OFF16(tcg_pcrel_diff(s, l->u.value_ptr)); | ||
38 | } else { | ||
39 | - /* Make sure to preserve destinations during retranslation. */ | ||
40 | - off16 = *s->code_ptr & INSN_OFF16(-1); | ||
41 | tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP16, l, 0); | ||
42 | } | ||
43 | tcg_out32(s, INSN_OP(0) | INSN_OP2(3) | BPR_PT | INSN_RS1(arg1) | ||
44 | -- | ||
45 | 2.17.2 | ||
46 | |||
47 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | There is no longer a need for preserving branch offset operands, | ||
2 | as we no longer re-translate. | ||
3 | 1 | ||
4 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | tcg/mips/tcg-target.inc.c | 7 +------ | ||
8 | 1 file changed, 1 insertion(+), 6 deletions(-) | ||
9 | |||
10 | diff --git a/tcg/mips/tcg-target.inc.c b/tcg/mips/tcg-target.inc.c | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/tcg/mips/tcg-target.inc.c | ||
13 | +++ b/tcg/mips/tcg-target.inc.c | ||
14 | @@ -XXX,XX +XXX,XX @@ static inline void tcg_out_opc_bf64(TCGContext *s, MIPSInsn opc, MIPSInsn opm, | ||
15 | static inline void tcg_out_opc_br(TCGContext *s, MIPSInsn opc, | ||
16 | TCGReg rt, TCGReg rs) | ||
17 | { | ||
18 | - /* We pay attention here to not modify the branch target by reading | ||
19 | - the existing value and using it again. This ensure that caches and | ||
20 | - memory are kept coherent during retranslation. */ | ||
21 | - uint16_t offset = (uint16_t)*s->code_ptr; | ||
22 | - | ||
23 | - tcg_out_opc_imm(s, opc, rt, rs, offset); | ||
24 | + tcg_out_opc_imm(s, opc, rt, rs, 0); | ||
25 | } | ||
26 | |||
27 | /* | ||
28 | -- | ||
29 | 2.17.2 | ||
30 | |||
31 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
2 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
3 | --- | ||
4 | tcg/i386/tcg-target.inc.c | 4 ++-- | ||
5 | 1 file changed, 2 insertions(+), 2 deletions(-) | ||
6 | 1 | ||
7 | diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c | ||
8 | index XXXXXXX..XXXXXXX 100644 | ||
9 | --- a/tcg/i386/tcg-target.inc.c | ||
10 | +++ b/tcg/i386/tcg-target.inc.c | ||
11 | @@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, | ||
12 | case R_386_PC32: | ||
13 | value -= (uintptr_t)code_ptr; | ||
14 | if (value != (int32_t)value) { | ||
15 | - tcg_abort(); | ||
16 | + return false; | ||
17 | } | ||
18 | /* FALLTHRU */ | ||
19 | case R_386_32: | ||
20 | @@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, | ||
21 | case R_386_PC8: | ||
22 | value -= (uintptr_t)code_ptr; | ||
23 | if (value != (int8_t)value) { | ||
24 | - tcg_abort(); | ||
25 | + return false; | ||
26 | } | ||
27 | tcg_patch8(code_ptr, value); | ||
28 | break; | ||
29 | -- | ||
30 | 2.17.2 | ||
31 | |||
32 | diff view generated by jsdifflib |
1 | This does require an extra two checks within the slow paths | 1 | Now that we have implemented the required tcg operations, |
---|---|---|---|
2 | to replace the assert that we're moving. | 2 | we can enable detection of host vector support. |
3 | 3 | ||
4 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | 4 | Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> (PPC32) |
5 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
6 | --- | 7 | --- |
7 | tcg/aarch64/tcg-target.inc.c | 37 ++++++++++++++++++++---------------- | 8 | tcg/ppc/tcg-target.inc.c | 4 ++++ |
8 | 1 file changed, 21 insertions(+), 16 deletions(-) | 9 | 1 file changed, 4 insertions(+) |
9 | 10 | ||
10 | diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c | 11 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c |
11 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
12 | --- a/tcg/aarch64/tcg-target.inc.c | 13 | --- a/tcg/ppc/tcg-target.inc.c |
13 | +++ b/tcg/aarch64/tcg-target.inc.c | 14 | +++ b/tcg/ppc/tcg-target.inc.c |
14 | @@ -XXX,XX +XXX,XX @@ static const int tcg_target_call_oarg_regs[1] = { | 15 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) |
15 | #define TCG_REG_GUEST_BASE TCG_REG_X28 | 16 | have_isel = have_isa_2_06; |
16 | #endif | 17 | #endif |
17 | 18 | ||
18 | -static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target) | 19 | + if (hwcap & PPC_FEATURE_HAS_ALTIVEC) { |
19 | +static inline bool reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target) | 20 | + have_altivec = true; |
20 | { | ||
21 | ptrdiff_t offset = target - code_ptr; | ||
22 | - tcg_debug_assert(offset == sextract64(offset, 0, 26)); | ||
23 | - /* read instruction, mask away previous PC_REL26 parameter contents, | ||
24 | - set the proper offset, then write back the instruction. */ | ||
25 | - *code_ptr = deposit32(*code_ptr, 0, 26, offset); | ||
26 | + if (offset == sextract64(offset, 0, 26)) { | ||
27 | + /* read instruction, mask away previous PC_REL26 parameter contents, | ||
28 | + set the proper offset, then write back the instruction. */ | ||
29 | + *code_ptr = deposit32(*code_ptr, 0, 26, offset); | ||
30 | + return true; | ||
31 | + } | 21 | + } |
32 | + return false; | 22 | + |
33 | } | 23 | tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff; |
34 | 24 | tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff; | |
35 | -static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target) | 25 | if (have_altivec) { |
36 | +static inline bool reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target) | ||
37 | { | ||
38 | ptrdiff_t offset = target - code_ptr; | ||
39 | - tcg_debug_assert(offset == sextract64(offset, 0, 19)); | ||
40 | - *code_ptr = deposit32(*code_ptr, 5, 19, offset); | ||
41 | + if (offset == sextract64(offset, 0, 19)) { | ||
42 | + *code_ptr = deposit32(*code_ptr, 5, 19, offset); | ||
43 | + return true; | ||
44 | + } | ||
45 | + return false; | ||
46 | } | ||
47 | |||
48 | static inline bool patch_reloc(tcg_insn_unit *code_ptr, int type, | ||
49 | @@ -XXX,XX +XXX,XX @@ static inline bool patch_reloc(tcg_insn_unit *code_ptr, int type, | ||
50 | switch (type) { | ||
51 | case R_AARCH64_JUMP26: | ||
52 | case R_AARCH64_CALL26: | ||
53 | - reloc_pc26(code_ptr, (tcg_insn_unit *)value); | ||
54 | - break; | ||
55 | + return reloc_pc26(code_ptr, (tcg_insn_unit *)value); | ||
56 | case R_AARCH64_CONDBR19: | ||
57 | - reloc_pc19(code_ptr, (tcg_insn_unit *)value); | ||
58 | - break; | ||
59 | + return reloc_pc19(code_ptr, (tcg_insn_unit *)value); | ||
60 | default: | ||
61 | - tcg_abort(); | ||
62 | + g_assert_not_reached(); | ||
63 | } | ||
64 | - return true; | ||
65 | } | ||
66 | |||
67 | #define TCG_CT_CONST_AIMM 0x100 | ||
68 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) | ||
69 | TCGMemOp opc = get_memop(oi); | ||
70 | TCGMemOp size = opc & MO_SIZE; | ||
71 | |||
72 | - reloc_pc19(lb->label_ptr[0], s->code_ptr); | ||
73 | + bool ok = reloc_pc19(lb->label_ptr[0], s->code_ptr); | ||
74 | + tcg_debug_assert(ok); | ||
75 | |||
76 | tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); | ||
77 | tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); | ||
78 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) | ||
79 | TCGMemOp opc = get_memop(oi); | ||
80 | TCGMemOp size = opc & MO_SIZE; | ||
81 | |||
82 | - reloc_pc19(lb->label_ptr[0], s->code_ptr); | ||
83 | + bool ok = reloc_pc19(lb->label_ptr[0], s->code_ptr); | ||
84 | + tcg_debug_assert(ok); | ||
85 | |||
86 | tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); | ||
87 | tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg); | ||
88 | -- | 26 | -- |
89 | 2.17.2 | 27 | 2.17.1 |
90 | 28 | ||
91 | 29 | diff view generated by jsdifflib |
1 | We now have an invariant that all TCG_TYPE_I32 values are | 1 | The VSX instruction set instructions include double-word loads and |
---|---|---|---|
2 | zero-extended, which means that we do not need to extend | 2 | stores, double-word load and splat, double-word permute, and bit |
3 | them again during qemu_ld/st, either explicitly via a separate | 3 | select. All of which require multiple operations in the Altivec |
4 | tcg_out_ext32u or implicitly via P_ADDR32. | 4 | instruction set. |
5 | 5 | ||
6 | Reviewed-by: Emilio G. Cota <cota@braap.org> | 6 | Because the VSX registers map %vsr32 to %vr0, and we have no current |
7 | intention or need to use vector registers outside %vr0-%vr19, force | ||
8 | on the {ax,bx,cx,tx} bits within the added VSX insns so that we don't | ||
9 | have to otherwise modify the VR[TABC] macros. | ||
10 | |||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
12 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
8 | --- | 13 | --- |
9 | tcg/i386/tcg-target.inc.c | 103 +++++++++++++++----------------------- | 14 | tcg/ppc/tcg-target.h | 5 ++-- |
10 | 1 file changed, 40 insertions(+), 63 deletions(-) | 15 | tcg/ppc/tcg-target.inc.c | 52 ++++++++++++++++++++++++++++++++++++---- |
16 | 2 files changed, 51 insertions(+), 6 deletions(-) | ||
11 | 17 | ||
12 | diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c | 18 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h |
13 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/tcg/i386/tcg-target.inc.c | 20 | --- a/tcg/ppc/tcg-target.h |
15 | +++ b/tcg/i386/tcg-target.inc.c | 21 | +++ b/tcg/ppc/tcg-target.h |
16 | @@ -XXX,XX +XXX,XX @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, | 22 | @@ -XXX,XX +XXX,XX @@ typedef enum { |
17 | #define P_EXT38 0x200 /* 0x0f 0x38 opcode prefix */ | 23 | |
18 | #define P_DATA16 0x400 /* 0x66 opcode prefix */ | 24 | extern TCGPowerISA have_isa; |
19 | #if TCG_TARGET_REG_BITS == 64 | 25 | extern bool have_altivec; |
20 | -# define P_ADDR32 0x800 /* 0x67 opcode prefix */ | 26 | +extern bool have_vsx; |
21 | # define P_REXW 0x1000 /* Set REX.W = 1 */ | 27 | |
22 | # define P_REXB_R 0x2000 /* REG field as byte register */ | 28 | #define have_isa_2_06 (have_isa >= tcg_isa_2_06) |
23 | # define P_REXB_RM 0x4000 /* R/M field as byte register */ | 29 | #define have_isa_3_00 (have_isa >= tcg_isa_3_00) |
24 | # define P_GS 0x8000 /* gs segment override */ | 30 | @@ -XXX,XX +XXX,XX @@ extern bool have_altivec; |
25 | #else | 31 | * instruction and substituting two 32-bit stores makes the generated |
26 | -# define P_ADDR32 0 | 32 | * code quite large. |
27 | # define P_REXW 0 | 33 | */ |
28 | # define P_REXB_R 0 | 34 | -#define TCG_TARGET_HAS_v64 0 |
29 | # define P_REXB_RM 0 | 35 | +#define TCG_TARGET_HAS_v64 have_vsx |
30 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x) | 36 | #define TCG_TARGET_HAS_v128 have_altivec |
31 | tcg_debug_assert((opc & P_REXW) == 0); | 37 | #define TCG_TARGET_HAS_v256 0 |
32 | tcg_out8(s, 0x66); | 38 | |
39 | @@ -XXX,XX +XXX,XX @@ extern bool have_altivec; | ||
40 | #define TCG_TARGET_HAS_mul_vec 1 | ||
41 | #define TCG_TARGET_HAS_sat_vec 1 | ||
42 | #define TCG_TARGET_HAS_minmax_vec 1 | ||
43 | -#define TCG_TARGET_HAS_bitsel_vec 0 | ||
44 | +#define TCG_TARGET_HAS_bitsel_vec have_vsx | ||
45 | #define TCG_TARGET_HAS_cmpsel_vec 0 | ||
46 | |||
47 | void flush_icache_range(uintptr_t start, uintptr_t stop); | ||
48 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
49 | index XXXXXXX..XXXXXXX 100644 | ||
50 | --- a/tcg/ppc/tcg-target.inc.c | ||
51 | +++ b/tcg/ppc/tcg-target.inc.c | ||
52 | @@ -XXX,XX +XXX,XX @@ static tcg_insn_unit *tb_ret_addr; | ||
53 | TCGPowerISA have_isa; | ||
54 | static bool have_isel; | ||
55 | bool have_altivec; | ||
56 | +bool have_vsx; | ||
57 | |||
58 | #ifndef CONFIG_SOFTMMU | ||
59 | #define TCG_GUEST_BASE_REG 30 | ||
60 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
61 | #define LVEBX XO31(7) | ||
62 | #define LVEHX XO31(39) | ||
63 | #define LVEWX XO31(71) | ||
64 | +#define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */ | ||
65 | +#define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */ | ||
66 | |||
67 | #define STVX XO31(231) | ||
68 | #define STVEWX XO31(199) | ||
69 | +#define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */ | ||
70 | |||
71 | #define VADDSBS VX4(768) | ||
72 | #define VADDUBS VX4(512) | ||
73 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
74 | |||
75 | #define VSLDOI VX4(44) | ||
76 | |||
77 | +#define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */ | ||
78 | +#define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */ | ||
79 | + | ||
80 | #define RT(r) ((r)<<21) | ||
81 | #define RS(r) ((r)<<21) | ||
82 | #define RA(r) ((r)<<16) | ||
83 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret, | ||
84 | add = 0; | ||
33 | } | 85 | } |
34 | - if (opc & P_ADDR32) { | 86 | |
35 | - tcg_out8(s, 0x67); | 87 | - load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1); |
36 | - } | 88 | - if (TCG_TARGET_REG_BITS == 64) { |
37 | if (opc & P_SIMDF3) { | 89 | - new_pool_l2(s, rel, s->code_ptr, add, val, val); |
38 | tcg_out8(s, 0xf3); | 90 | + if (have_vsx) { |
39 | } else if (opc & P_SIMDF2) { | 91 | + load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX; |
40 | @@ -XXX,XX +XXX,XX @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, | 92 | + load_insn |= VRT(ret) | RB(TCG_REG_TMP1); |
41 | tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0); | 93 | + if (TCG_TARGET_REG_BITS == 64) { |
42 | 94 | + new_pool_label(s, val, rel, s->code_ptr, add); | |
43 | /* Prepare for both the fast path add of the tlb addend, and the slow | 95 | + } else { |
44 | - path function argument setup. There are two cases worth note: | 96 | + new_pool_l2(s, rel, s->code_ptr, add, val, val); |
45 | - For 32-bit guest and x86_64 host, MOVL zero-extends the guest address | 97 | + } |
46 | - before the fastpath ADDQ below. For 64-bit guest and x32 host, MOVQ | 98 | } else { |
47 | - copies the entire guest address for the slow path, while truncation | 99 | - new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val); |
48 | - for the 32-bit host happens with the fastpath ADDL below. */ | 100 | + load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1); |
49 | + path function argument setup. */ | 101 | + if (TCG_TARGET_REG_BITS == 64) { |
50 | tcg_out_mov(s, ttype, r1, addrlo); | 102 | + new_pool_l2(s, rel, s->code_ptr, add, val, val); |
51 | 103 | + } else { | |
52 | /* jne slow_path */ | 104 | + new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val); |
53 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) | 105 | + } |
54 | #else | ||
55 | { | ||
56 | int32_t offset = guest_base; | ||
57 | - TCGReg base = addrlo; | ||
58 | int index = -1; | ||
59 | int seg = 0; | ||
60 | |||
61 | - /* For a 32-bit guest, the high 32 bits may contain garbage. | ||
62 | - We can do this with the ADDR32 prefix if we're not using | ||
63 | - a guest base, or when using segmentation. Otherwise we | ||
64 | - need to zero-extend manually. */ | ||
65 | + /* | ||
66 | + * Recall we store 32-bit values zero-extended. No need for | ||
67 | + * further manual extension or an addr32 (0x67) prefix. | ||
68 | + */ | ||
69 | if (guest_base == 0 || guest_base_flags) { | ||
70 | seg = guest_base_flags; | ||
71 | offset = 0; | ||
72 | - if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { | ||
73 | - seg |= P_ADDR32; | ||
74 | - } | ||
75 | - } else if (TCG_TARGET_REG_BITS == 64) { | ||
76 | - if (TARGET_LONG_BITS == 32) { | ||
77 | - tcg_out_ext32u(s, TCG_REG_L0, base); | ||
78 | - base = TCG_REG_L0; | ||
79 | - } | ||
80 | - if (offset != guest_base) { | ||
81 | - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base); | ||
82 | - index = TCG_REG_L1; | ||
83 | - offset = 0; | ||
84 | - } | ||
85 | + } else if (TCG_TARGET_REG_BITS == 64 && offset != guest_base) { | ||
86 | + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base); | ||
87 | + index = TCG_REG_L1; | ||
88 | + offset = 0; | ||
89 | } | ||
90 | |||
91 | tcg_out_qemu_ld_direct(s, datalo, datahi, | ||
92 | - base, index, offset, seg, is64, opc); | ||
93 | + addrlo, index, offset, seg, is64, opc); | ||
94 | } | 106 | } |
95 | #endif | 107 | |
96 | } | 108 | if (USE_REG_TB) { |
97 | 109 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, | |
98 | static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, | 110 | /* fallthru */ |
99 | - TCGReg base, intptr_t ofs, int seg, | 111 | case TCG_TYPE_V64: |
100 | - TCGMemOp memop) | 112 | tcg_debug_assert(ret >= TCG_REG_V0); |
101 | + TCGReg base, int index, intptr_t ofs, | 113 | + if (have_vsx) { |
102 | + int seg, TCGMemOp memop) | 114 | + tcg_out_mem_long(s, 0, LXSDX, ret, base, offset); |
103 | { | 115 | + break; |
104 | /* ??? Ideally we wouldn't need a scratch register. For user-only, | 116 | + } |
105 | we could perform the bswap twice to restore the original value | 117 | tcg_debug_assert((offset & 7) == 0); |
106 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, | 118 | tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16); |
107 | tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); | 119 | if (offset & 8) { |
108 | datalo = scratch; | 120 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, |
109 | } | 121 | /* fallthru */ |
110 | - tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg, | 122 | case TCG_TYPE_V64: |
111 | - datalo, base, ofs); | 123 | tcg_debug_assert(arg >= TCG_REG_V0); |
112 | + tcg_out_modrm_sib_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg, | 124 | + if (have_vsx) { |
113 | + datalo, base, index, 0, ofs); | 125 | + tcg_out_mem_long(s, 0, STXSDX, arg, base, offset); |
114 | break; | 126 | + break; |
115 | case MO_16: | 127 | + } |
116 | if (bswap) { | 128 | tcg_debug_assert((offset & 7) == 0); |
117 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, | 129 | if (offset & 8) { |
118 | tcg_out_rolw_8(s, scratch); | 130 | tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8); |
119 | datalo = scratch; | 131 | @@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) |
120 | } | 132 | case INDEX_op_shri_vec: |
121 | - tcg_out_modrm_offset(s, movop + P_DATA16 + seg, datalo, base, ofs); | 133 | case INDEX_op_sari_vec: |
122 | + tcg_out_modrm_sib_offset(s, movop + P_DATA16 + seg, datalo, | 134 | return vece <= MO_32 ? -1 : 0; |
123 | + base, index, 0, ofs); | 135 | + case INDEX_op_bitsel_vec: |
124 | break; | 136 | + return have_vsx; |
125 | case MO_32: | 137 | default: |
126 | if (bswap) { | 138 | return 0; |
127 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, | 139 | } |
128 | tcg_out_bswap32(s, scratch); | 140 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, |
129 | datalo = scratch; | 141 | tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16)); |
130 | } | ||
131 | - tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs); | ||
132 | + tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs); | ||
133 | break; | 142 | break; |
134 | case MO_64: | 143 | case MO_64: |
135 | if (TCG_TARGET_REG_BITS == 64) { | 144 | + if (have_vsx) { |
136 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, | 145 | + tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src)); |
137 | tcg_out_bswap64(s, scratch); | 146 | + break; |
138 | datalo = scratch; | 147 | + } |
139 | } | 148 | tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8); |
140 | - tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs); | 149 | tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8); |
141 | + tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo, | 150 | break; |
142 | + base, index, 0, ofs); | 151 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, |
143 | } else if (bswap) { | 152 | tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16)); |
144 | tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi); | 153 | break; |
145 | tcg_out_bswap32(s, scratch); | 154 | case MO_64: |
146 | - tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs); | 155 | + if (have_vsx) { |
147 | + tcg_out_modrm_sib_offset(s, OPC_MOVL_EvGv + seg, scratch, | 156 | + tcg_out_mem_long(s, 0, LXVDSX, out, base, offset); |
148 | + base, index, 0, ofs); | 157 | + break; |
149 | tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); | 158 | + } |
150 | tcg_out_bswap32(s, scratch); | 159 | tcg_debug_assert((offset & 7) == 0); |
151 | - tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4); | 160 | tcg_out_mem_long(s, 0, LVX, out, base, offset & -16); |
152 | + tcg_out_modrm_sib_offset(s, OPC_MOVL_EvGv + seg, scratch, | 161 | tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8); |
153 | + base, index, 0, ofs + 4); | 162 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, |
154 | } else { | ||
155 | if (real_bswap) { | ||
156 | int t = datalo; | ||
157 | datalo = datahi; | ||
158 | datahi = t; | ||
159 | } | ||
160 | - tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs); | ||
161 | - tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs+4); | ||
162 | + tcg_out_modrm_sib_offset(s, movop + seg, datalo, | ||
163 | + base, index, 0, ofs); | ||
164 | + tcg_out_modrm_sib_offset(s, movop + seg, datahi, | ||
165 | + base, index, 0, ofs + 4); | ||
166 | } | 163 | } |
167 | break; | 164 | break; |
168 | default: | 165 | |
169 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) | 166 | + case INDEX_op_bitsel_vec: |
170 | label_ptr, offsetof(CPUTLBEntry, addr_write)); | 167 | + tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3])); |
171 | 168 | + return; | |
172 | /* TLB Hit. */ | 169 | + |
173 | - tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); | 170 | case INDEX_op_dup2_vec: |
174 | + tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc); | 171 | assert(TCG_TARGET_REG_BITS == 32); |
175 | 172 | /* With inputs a1 = xLxx, a2 = xHxx */ | |
176 | /* Record the current context of a store into ldst label */ | 173 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) |
177 | add_qemu_ldst_label(s, false, is64, oi, datalo, datahi, addrlo, addrhi, | 174 | case INDEX_op_st_vec: |
178 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) | 175 | case INDEX_op_dupm_vec: |
179 | #else | 176 | return &v_r; |
180 | { | 177 | + case INDEX_op_bitsel_vec: |
181 | int32_t offset = guest_base; | 178 | case INDEX_op_ppc_msum_vec: |
182 | - TCGReg base = addrlo; | 179 | return &v_v_v_v; |
183 | + int index = -1; | 180 | |
184 | int seg = 0; | 181 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) |
185 | 182 | ||
186 | - /* See comment in tcg_out_qemu_ld re zero-extension of addrlo. */ | 183 | if (hwcap & PPC_FEATURE_HAS_ALTIVEC) { |
187 | + /* | 184 | have_altivec = true; |
188 | + * Recall we store 32-bit values zero-extended. No need for | 185 | + /* We only care about the portion of VSX that overlaps Altivec. */ |
189 | + * further manual extension or an addr32 (0x67) prefix. | 186 | + if (hwcap & PPC_FEATURE_HAS_VSX) { |
190 | + */ | 187 | + have_vsx = true; |
191 | if (guest_base == 0 || guest_base_flags) { | 188 | + } |
192 | seg = guest_base_flags; | ||
193 | offset = 0; | ||
194 | - if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { | ||
195 | - seg |= P_ADDR32; | ||
196 | - } | ||
197 | - } else if (TCG_TARGET_REG_BITS == 64) { | ||
198 | - /* ??? Note that we can't use the same SIB addressing scheme | ||
199 | - as for loads, since we require L0 free for bswap. */ | ||
200 | - if (offset != guest_base) { | ||
201 | - if (TARGET_LONG_BITS == 32) { | ||
202 | - tcg_out_ext32u(s, TCG_REG_L0, base); | ||
203 | - base = TCG_REG_L0; | ||
204 | - } | ||
205 | - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base); | ||
206 | - tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base); | ||
207 | - base = TCG_REG_L1; | ||
208 | - offset = 0; | ||
209 | - } else if (TARGET_LONG_BITS == 32) { | ||
210 | - tcg_out_ext32u(s, TCG_REG_L1, base); | ||
211 | - base = TCG_REG_L1; | ||
212 | - } | ||
213 | + } else if (TCG_TARGET_REG_BITS == 64 && offset != guest_base) { | ||
214 | + /* ??? Note that we require L0 free for bswap. */ | ||
215 | + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base); | ||
216 | + index = TCG_REG_L1; | ||
217 | + offset = 0; | ||
218 | } | ||
219 | |||
220 | - tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc); | ||
221 | + tcg_out_qemu_st_direct(s, datalo, datahi, | ||
222 | + addrlo, index, offset, seg, opc); | ||
223 | } | 189 | } |
224 | #endif | 190 | |
225 | } | 191 | tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff; |
226 | -- | 192 | -- |
227 | 2.17.2 | 193 | 2.17.1 |
228 | 194 | ||
229 | 195 | diff view generated by jsdifflib |
1 | This does require an extra two checks within the slow paths | 1 | These new instructions are conditional only on MSR.VEC and |
---|---|---|---|
2 | to replace the assert that we're moving. | 2 | are thus part of the Altivec instruction set, and not VSX. |
3 | 3 | This includes lots of double-word arithmetic and a few extra | |
4 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | 4 | logical operations. |
5 | |||
6 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
6 | --- | 8 | --- |
7 | tcg/arm/tcg-target.inc.c | 22 ++++++++++++++++------ | 9 | tcg/ppc/tcg-target.h | 4 +- |
8 | 1 file changed, 16 insertions(+), 6 deletions(-) | 10 | tcg/ppc/tcg-target.inc.c | 85 ++++++++++++++++++++++++++++++---------- |
9 | 11 | 2 files changed, 67 insertions(+), 22 deletions(-) | |
10 | diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c | 12 | |
13 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | ||
11 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
12 | --- a/tcg/arm/tcg-target.inc.c | 15 | --- a/tcg/ppc/tcg-target.h |
13 | +++ b/tcg/arm/tcg-target.inc.c | 16 | +++ b/tcg/ppc/tcg-target.h |
14 | @@ -XXX,XX +XXX,XX @@ static const uint8_t tcg_cond_to_arm_cond[] = { | 17 | @@ -XXX,XX +XXX,XX @@ typedef enum { |
15 | [TCG_COND_GTU] = COND_HI, | 18 | typedef enum { |
16 | }; | 19 | tcg_isa_base, |
17 | 20 | tcg_isa_2_06, | |
18 | -static inline void reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target) | 21 | + tcg_isa_2_07, |
19 | +static inline bool reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target) | 22 | tcg_isa_3_00, |
23 | } TCGPowerISA; | ||
24 | |||
25 | @@ -XXX,XX +XXX,XX @@ extern bool have_altivec; | ||
26 | extern bool have_vsx; | ||
27 | |||
28 | #define have_isa_2_06 (have_isa >= tcg_isa_2_06) | ||
29 | +#define have_isa_2_07 (have_isa >= tcg_isa_2_07) | ||
30 | #define have_isa_3_00 (have_isa >= tcg_isa_3_00) | ||
31 | |||
32 | /* optional instructions automatically implemented */ | ||
33 | @@ -XXX,XX +XXX,XX @@ extern bool have_vsx; | ||
34 | #define TCG_TARGET_HAS_v256 0 | ||
35 | |||
36 | #define TCG_TARGET_HAS_andc_vec 1 | ||
37 | -#define TCG_TARGET_HAS_orc_vec 0 | ||
38 | +#define TCG_TARGET_HAS_orc_vec have_isa_2_07 | ||
39 | #define TCG_TARGET_HAS_not_vec 1 | ||
40 | #define TCG_TARGET_HAS_neg_vec 0 | ||
41 | #define TCG_TARGET_HAS_abs_vec 0 | ||
42 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/tcg/ppc/tcg-target.inc.c | ||
45 | +++ b/tcg/ppc/tcg-target.inc.c | ||
46 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
47 | #define VADDSWS VX4(896) | ||
48 | #define VADDUWS VX4(640) | ||
49 | #define VADDUWM VX4(128) | ||
50 | +#define VADDUDM VX4(192) /* v2.07 */ | ||
51 | |||
52 | #define VSUBSBS VX4(1792) | ||
53 | #define VSUBUBS VX4(1536) | ||
54 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
55 | #define VSUBSWS VX4(1920) | ||
56 | #define VSUBUWS VX4(1664) | ||
57 | #define VSUBUWM VX4(1152) | ||
58 | +#define VSUBUDM VX4(1216) /* v2.07 */ | ||
59 | |||
60 | #define VMAXSB VX4(258) | ||
61 | #define VMAXSH VX4(322) | ||
62 | #define VMAXSW VX4(386) | ||
63 | +#define VMAXSD VX4(450) /* v2.07 */ | ||
64 | #define VMAXUB VX4(2) | ||
65 | #define VMAXUH VX4(66) | ||
66 | #define VMAXUW VX4(130) | ||
67 | +#define VMAXUD VX4(194) /* v2.07 */ | ||
68 | #define VMINSB VX4(770) | ||
69 | #define VMINSH VX4(834) | ||
70 | #define VMINSW VX4(898) | ||
71 | +#define VMINSD VX4(962) /* v2.07 */ | ||
72 | #define VMINUB VX4(514) | ||
73 | #define VMINUH VX4(578) | ||
74 | #define VMINUW VX4(642) | ||
75 | +#define VMINUD VX4(706) /* v2.07 */ | ||
76 | |||
77 | #define VCMPEQUB VX4(6) | ||
78 | #define VCMPEQUH VX4(70) | ||
79 | #define VCMPEQUW VX4(134) | ||
80 | +#define VCMPEQUD VX4(199) /* v2.07 */ | ||
81 | #define VCMPGTSB VX4(774) | ||
82 | #define VCMPGTSH VX4(838) | ||
83 | #define VCMPGTSW VX4(902) | ||
84 | +#define VCMPGTSD VX4(967) /* v2.07 */ | ||
85 | #define VCMPGTUB VX4(518) | ||
86 | #define VCMPGTUH VX4(582) | ||
87 | #define VCMPGTUW VX4(646) | ||
88 | +#define VCMPGTUD VX4(711) /* v2.07 */ | ||
89 | |||
90 | #define VSLB VX4(260) | ||
91 | #define VSLH VX4(324) | ||
92 | #define VSLW VX4(388) | ||
93 | +#define VSLD VX4(1476) /* v2.07 */ | ||
94 | #define VSRB VX4(516) | ||
95 | #define VSRH VX4(580) | ||
96 | #define VSRW VX4(644) | ||
97 | +#define VSRD VX4(1732) /* v2.07 */ | ||
98 | #define VSRAB VX4(772) | ||
99 | #define VSRAH VX4(836) | ||
100 | #define VSRAW VX4(900) | ||
101 | +#define VSRAD VX4(964) /* v2.07 */ | ||
102 | #define VRLB VX4(4) | ||
103 | #define VRLH VX4(68) | ||
104 | #define VRLW VX4(132) | ||
105 | +#define VRLD VX4(196) /* v2.07 */ | ||
106 | |||
107 | #define VMULEUB VX4(520) | ||
108 | #define VMULEUH VX4(584) | ||
109 | +#define VMULEUW VX4(648) /* v2.07 */ | ||
110 | #define VMULOUB VX4(8) | ||
111 | #define VMULOUH VX4(72) | ||
112 | +#define VMULOUW VX4(136) /* v2.07 */ | ||
113 | +#define VMULUWM VX4(137) /* v2.07 */ | ||
114 | #define VMSUMUHM VX4(38) | ||
115 | |||
116 | #define VMRGHB VX4(12) | ||
117 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
118 | #define VNOR VX4(1284) | ||
119 | #define VOR VX4(1156) | ||
120 | #define VXOR VX4(1220) | ||
121 | +#define VEQV VX4(1668) /* v2.07 */ | ||
122 | +#define VNAND VX4(1412) /* v2.07 */ | ||
123 | +#define VORC VX4(1348) /* v2.07 */ | ||
124 | |||
125 | #define VSPLTB VX4(524) | ||
126 | #define VSPLTH VX4(588) | ||
127 | @@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | ||
128 | case INDEX_op_andc_vec: | ||
129 | case INDEX_op_not_vec: | ||
130 | return 1; | ||
131 | + case INDEX_op_orc_vec: | ||
132 | + return have_isa_2_07; | ||
133 | case INDEX_op_add_vec: | ||
134 | case INDEX_op_sub_vec: | ||
135 | case INDEX_op_smax_vec: | ||
136 | case INDEX_op_smin_vec: | ||
137 | case INDEX_op_umax_vec: | ||
138 | case INDEX_op_umin_vec: | ||
139 | + case INDEX_op_shlv_vec: | ||
140 | + case INDEX_op_shrv_vec: | ||
141 | + case INDEX_op_sarv_vec: | ||
142 | + return vece <= MO_32 || have_isa_2_07; | ||
143 | case INDEX_op_ssadd_vec: | ||
144 | case INDEX_op_sssub_vec: | ||
145 | case INDEX_op_usadd_vec: | ||
146 | case INDEX_op_ussub_vec: | ||
147 | - case INDEX_op_shlv_vec: | ||
148 | - case INDEX_op_shrv_vec: | ||
149 | - case INDEX_op_sarv_vec: | ||
150 | return vece <= MO_32; | ||
151 | case INDEX_op_cmp_vec: | ||
152 | - case INDEX_op_mul_vec: | ||
153 | case INDEX_op_shli_vec: | ||
154 | case INDEX_op_shri_vec: | ||
155 | case INDEX_op_sari_vec: | ||
156 | - return vece <= MO_32 ? -1 : 0; | ||
157 | + return vece <= MO_32 || have_isa_2_07 ? -1 : 0; | ||
158 | + case INDEX_op_mul_vec: | ||
159 | + switch (vece) { | ||
160 | + case MO_8: | ||
161 | + case MO_16: | ||
162 | + return -1; | ||
163 | + case MO_32: | ||
164 | + return have_isa_2_07 ? 1 : -1; | ||
165 | + } | ||
166 | + return 0; | ||
167 | case INDEX_op_bitsel_vec: | ||
168 | return have_vsx; | ||
169 | default: | ||
170 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
171 | const TCGArg *args, const int *const_args) | ||
20 | { | 172 | { |
21 | ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2; | 173 | static const uint32_t |
22 | - *code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff); | 174 | - add_op[4] = { VADDUBM, VADDUHM, VADDUWM, 0 }, |
23 | + if (offset == sextract32(offset, 0, 24)) { | 175 | - sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, 0 }, |
24 | + *code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff); | 176 | - eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 }, |
25 | + return true; | 177 | - gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 }, |
178 | - gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 }, | ||
179 | + add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM }, | ||
180 | + sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM }, | ||
181 | + eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD }, | ||
182 | + gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD }, | ||
183 | + gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD }, | ||
184 | ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 }, | ||
185 | usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 }, | ||
186 | sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 }, | ||
187 | ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 }, | ||
188 | - umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 }, | ||
189 | - smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 }, | ||
190 | - umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 }, | ||
191 | - smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 }, | ||
192 | - shlv_op[4] = { VSLB, VSLH, VSLW, 0 }, | ||
193 | - shrv_op[4] = { VSRB, VSRH, VSRW, 0 }, | ||
194 | - sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 }, | ||
195 | + umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD }, | ||
196 | + smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD }, | ||
197 | + umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD }, | ||
198 | + smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD }, | ||
199 | + shlv_op[4] = { VSLB, VSLH, VSLW, VSLD }, | ||
200 | + shrv_op[4] = { VSRB, VSRH, VSRW, VSRD }, | ||
201 | + sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD }, | ||
202 | mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 }, | ||
203 | mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 }, | ||
204 | - muleu_op[4] = { VMULEUB, VMULEUH, 0, 0 }, | ||
205 | - mulou_op[4] = { VMULOUB, VMULOUH, 0, 0 }, | ||
206 | + muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 }, | ||
207 | + mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 }, | ||
208 | pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 }, | ||
209 | - rotl_op[4] = { VRLB, VRLH, VRLW, 0 }; | ||
210 | + rotl_op[4] = { VRLB, VRLH, VRLW, VRLD }; | ||
211 | |||
212 | TCGType type = vecl + TCG_TYPE_V64; | ||
213 | TCGArg a0 = args[0], a1 = args[1], a2 = args[2]; | ||
214 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
215 | case INDEX_op_sub_vec: | ||
216 | insn = sub_op[vece]; | ||
217 | break; | ||
218 | + case INDEX_op_mul_vec: | ||
219 | + tcg_debug_assert(vece == MO_32 && have_isa_2_07); | ||
220 | + insn = VMULUWM; | ||
221 | + break; | ||
222 | case INDEX_op_ssadd_vec: | ||
223 | insn = ssadd_op[vece]; | ||
224 | break; | ||
225 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
226 | insn = VNOR; | ||
227 | a2 = a1; | ||
228 | break; | ||
229 | + case INDEX_op_orc_vec: | ||
230 | + insn = VORC; | ||
231 | + break; | ||
232 | |||
233 | case INDEX_op_cmp_vec: | ||
234 | switch (args[3]) { | ||
235 | @@ -XXX,XX +XXX,XX @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, | ||
236 | { | ||
237 | bool need_swap = false, need_inv = false; | ||
238 | |||
239 | - tcg_debug_assert(vece <= MO_32); | ||
240 | + tcg_debug_assert(vece <= MO_32 || have_isa_2_07); | ||
241 | |||
242 | switch (cond) { | ||
243 | case TCG_COND_EQ: | ||
244 | @@ -XXX,XX +XXX,XX @@ static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0, | ||
245 | break; | ||
246 | |||
247 | case MO_32: | ||
248 | + tcg_debug_assert(!have_isa_2_07); | ||
249 | t3 = tcg_temp_new_vec(type); | ||
250 | t4 = tcg_temp_new_vec(type); | ||
251 | tcg_gen_dupi_vec(MO_8, t4, -16); | ||
252 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | ||
253 | if (hwcap & PPC_FEATURE_ARCH_2_06) { | ||
254 | have_isa = tcg_isa_2_06; | ||
255 | } | ||
256 | +#ifdef PPC_FEATURE2_ARCH_2_07 | ||
257 | + if (hwcap2 & PPC_FEATURE2_ARCH_2_07) { | ||
258 | + have_isa = tcg_isa_2_07; | ||
26 | + } | 259 | + } |
27 | + return false; | 260 | +#endif |
28 | } | 261 | #ifdef PPC_FEATURE2_ARCH_3_00 |
29 | 262 | if (hwcap2 & PPC_FEATURE2_ARCH_3_00) { | |
30 | static bool patch_reloc(tcg_insn_unit *code_ptr, int type, | 263 | have_isa = tcg_isa_3_00; |
31 | @@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, | ||
32 | tcg_debug_assert(addend == 0); | ||
33 | |||
34 | if (type == R_ARM_PC24) { | ||
35 | - reloc_pc24(code_ptr, (tcg_insn_unit *)value); | ||
36 | + return reloc_pc24(code_ptr, (tcg_insn_unit *)value); | ||
37 | } else if (type == R_ARM_PC13) { | ||
38 | intptr_t diff = value - (uintptr_t)(code_ptr + 2); | ||
39 | tcg_insn_unit insn = *code_ptr; | ||
40 | @@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, | ||
41 | } else { | ||
42 | int rd = extract32(insn, 12, 4); | ||
43 | int rt = rd == TCG_REG_PC ? TCG_REG_TMP : rd; | ||
44 | - assert(diff >= 0x1000 && diff < 0x100000); | ||
45 | + | ||
46 | + if (diff < 0x1000 || diff >= 0x100000) { | ||
47 | + return false; | ||
48 | + } | ||
49 | + | ||
50 | /* add rt, pc, #high */ | ||
51 | *code_ptr++ = ((insn & 0xf0000000) | (1 << 25) | ARITH_ADD | ||
52 | | (TCG_REG_PC << 16) | (rt << 12) | ||
53 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) | ||
54 | TCGMemOp opc = get_memop(oi); | ||
55 | void *func; | ||
56 | |||
57 | - reloc_pc24(lb->label_ptr[0], s->code_ptr); | ||
58 | + bool ok = reloc_pc24(lb->label_ptr[0], s->code_ptr); | ||
59 | + tcg_debug_assert(ok); | ||
60 | |||
61 | argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0); | ||
62 | if (TARGET_LONG_BITS == 64) { | ||
63 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) | ||
64 | TCGMemOpIdx oi = lb->oi; | ||
65 | TCGMemOp opc = get_memop(oi); | ||
66 | |||
67 | - reloc_pc24(lb->label_ptr[0], s->code_ptr); | ||
68 | + bool ok = reloc_pc24(lb->label_ptr[0], s->code_ptr); | ||
69 | + tcg_debug_assert(ok); | ||
70 | |||
71 | argreg = TCG_REG_R0; | ||
72 | argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0); | ||
73 | -- | 264 | -- |
74 | 2.17.2 | 265 | 2.17.1 |
75 | 266 | ||
76 | 267 | diff view generated by jsdifflib |
1 | From: Alistair Francis <Alistair.Francis@wdc.com> | 1 | These new instructions are conditional only on MSR.VSX and |
---|---|---|---|
2 | are thus part of the VSX instruction set, and not Altivec. | ||
3 | This includes double-word loads and stores. | ||
2 | 4 | ||
3 | Instead of hard coding 31 for the shift right use TCG_TARGET_REG_BITS - 1. | 5 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> |
4 | |||
5 | Signed-off-by: Alistair Francis <alistair.francis@wdc.com> | ||
6 | Message-Id: <7dfbddf7014a595150aa79011ddb342c3cc17ec3.1544648105.git.alistair.francis@wdc.com> | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
9 | --- | 7 | --- |
10 | tcg/mips/tcg-target.inc.c | 2 +- | 8 | tcg/ppc/tcg-target.inc.c | 11 +++++++++++ |
11 | 1 file changed, 1 insertion(+), 1 deletion(-) | 9 | 1 file changed, 11 insertions(+) |
12 | 10 | ||
13 | diff --git a/tcg/mips/tcg-target.inc.c b/tcg/mips/tcg-target.inc.c | 11 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c |
14 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/tcg/mips/tcg-target.inc.c | 13 | --- a/tcg/ppc/tcg-target.inc.c |
16 | +++ b/tcg/mips/tcg-target.inc.c | 14 | +++ b/tcg/ppc/tcg-target.inc.c |
17 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_addsub2(TCGContext *s, TCGReg rl, TCGReg rh, TCGReg al, | 15 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, |
18 | tcg_out_opc_imm(s, OPC_ADDIU, rl, al, bl); | 16 | #define LVEWX XO31(71) |
19 | tcg_out_opc_imm(s, OPC_SLTIU, TCG_TMP0, rl, bl); | 17 | #define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */ |
20 | } else if (rl == al && rl == bl) { | 18 | #define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */ |
21 | - tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, al, 31); | 19 | +#define LXSIWZX (XO31(12) | 1) /* v2.07, force tx=1 */ |
22 | + tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, al, TCG_TARGET_REG_BITS - 1); | 20 | |
23 | tcg_out_opc_reg(s, OPC_ADDU, rl, al, bl); | 21 | #define STVX XO31(231) |
24 | } else { | 22 | #define STVEWX XO31(199) |
25 | tcg_out_opc_reg(s, OPC_ADDU, rl, al, bl); | 23 | #define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */ |
24 | +#define STXSIWX (XO31(140) | 1) /* v2.07, force sx=1 */ | ||
25 | |||
26 | #define VADDSBS VX4(768) | ||
27 | #define VADDUBS VX4(512) | ||
28 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, | ||
29 | tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset); | ||
30 | break; | ||
31 | } | ||
32 | + if (have_isa_2_07 && have_vsx) { | ||
33 | + tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset); | ||
34 | + break; | ||
35 | + } | ||
36 | tcg_debug_assert((offset & 3) == 0); | ||
37 | tcg_out_mem_long(s, 0, LVEWX, ret, base, offset); | ||
38 | shift = (offset - 4) & 0xc; | ||
39 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, | ||
40 | tcg_out_mem_long(s, STW, STWX, arg, base, offset); | ||
41 | break; | ||
42 | } | ||
43 | + if (have_isa_2_07 && have_vsx) { | ||
44 | + tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset); | ||
45 | + break; | ||
46 | + } | ||
47 | + assert((offset & 3) == 0); | ||
48 | tcg_debug_assert((offset & 3) == 0); | ||
49 | shift = (offset - 4) & 0xc; | ||
50 | if (shift) { | ||
26 | -- | 51 | -- |
27 | 2.17.2 | 52 | 2.17.1 |
28 | 53 | ||
29 | 54 | diff view generated by jsdifflib |
1 | The reloc_pc{14,24}_val routines retain their asserts. | 1 | These new instructions are conditional on MSR.FP when TX=0 and |
---|---|---|---|
2 | Use these directly within the slow paths. | 2 | MSR.VEC when TX=1. Since we only care about the Altivec registers, |
3 | and force TX=1, we can consider these to be Altivec instructions. | ||
4 | Since Altivec is true for any use of vector types, we only need | ||
5 | test have_isa_2_07. | ||
3 | 6 | ||
4 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | 7 | This includes moves to and from the integer registers. |
8 | |||
9 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 10 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
6 | --- | 11 | --- |
7 | tcg/ppc/tcg-target.inc.c | 32 +++++++++++++++++++++----------- | 12 | tcg/ppc/tcg-target.inc.c | 32 ++++++++++++++++++++++++++------ |
8 | 1 file changed, 21 insertions(+), 11 deletions(-) | 13 | 1 file changed, 26 insertions(+), 6 deletions(-) |
9 | 14 | ||
10 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 15 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c |
11 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
12 | --- a/tcg/ppc/tcg-target.inc.c | 17 | --- a/tcg/ppc/tcg-target.inc.c |
13 | +++ b/tcg/ppc/tcg-target.inc.c | 18 | +++ b/tcg/ppc/tcg-target.inc.c |
14 | @@ -XXX,XX +XXX,XX @@ static uint32_t reloc_pc24_val(tcg_insn_unit *pc, tcg_insn_unit *target) | 19 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, |
15 | return disp & 0x3fffffc; | 20 | #define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */ |
16 | } | 21 | #define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */ |
17 | 22 | ||
18 | -static void reloc_pc24(tcg_insn_unit *pc, tcg_insn_unit *target) | 23 | +#define MFVSRD (XO31(51) | 1) /* v2.07, force sx=1 */ |
19 | +static bool reloc_pc24(tcg_insn_unit *pc, tcg_insn_unit *target) | 24 | +#define MFVSRWZ (XO31(115) | 1) /* v2.07, force sx=1 */ |
20 | { | 25 | +#define MTVSRD (XO31(179) | 1) /* v2.07, force tx=1 */ |
21 | - *pc = (*pc & ~0x3fffffc) | reloc_pc24_val(pc, target); | 26 | +#define MTVSRWZ (XO31(243) | 1) /* v2.07, force tx=1 */ |
22 | + ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); | 27 | + |
23 | + if (in_range_b(disp)) { | 28 | #define RT(r) ((r)<<21) |
24 | + *pc = (*pc & ~0x3fffffc) | (disp & 0x3fffffc); | 29 | #define RS(r) ((r)<<21) |
25 | + return true; | 30 | #define RA(r) ((r)<<16) |
26 | + } | 31 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) |
27 | + return false; | 32 | tcg_debug_assert(TCG_TARGET_REG_BITS == 64); |
28 | } | 33 | /* fallthru */ |
29 | 34 | case TCG_TYPE_I32: | |
30 | static uint16_t reloc_pc14_val(tcg_insn_unit *pc, tcg_insn_unit *target) | 35 | - if (ret < TCG_REG_V0 && arg < TCG_REG_V0) { |
31 | @@ -XXX,XX +XXX,XX @@ static uint16_t reloc_pc14_val(tcg_insn_unit *pc, tcg_insn_unit *target) | 36 | - tcg_out32(s, OR | SAB(arg, ret, arg)); |
32 | return disp & 0xfffc; | 37 | - break; |
33 | } | 38 | - } else if (ret < TCG_REG_V0 || arg < TCG_REG_V0) { |
34 | 39 | - /* Altivec does not support vector/integer moves. */ | |
35 | -static void reloc_pc14(tcg_insn_unit *pc, tcg_insn_unit *target) | 40 | - return false; |
36 | +static bool reloc_pc14(tcg_insn_unit *pc, tcg_insn_unit *target) | 41 | + if (ret < TCG_REG_V0) { |
37 | { | 42 | + if (arg < TCG_REG_V0) { |
38 | - *pc = (*pc & ~0xfffc) | reloc_pc14_val(pc, target); | 43 | + tcg_out32(s, OR | SAB(arg, ret, arg)); |
39 | + ptrdiff_t disp = tcg_ptr_byte_diff(target, pc); | 44 | + break; |
40 | + if (disp == (int16_t) disp) { | 45 | + } else if (have_isa_2_07) { |
41 | + *pc = (*pc & ~0xfffc) | (disp & 0xfffc); | 46 | + tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD) |
42 | + return true; | 47 | + | VRT(arg) | RA(ret)); |
43 | + } | 48 | + break; |
44 | + return false; | 49 | + } else { |
45 | } | 50 | + /* Altivec does not support vector->integer moves. */ |
46 | |||
47 | /* parse target specific constraints */ | ||
48 | @@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, | ||
49 | |||
50 | switch (type) { | ||
51 | case R_PPC_REL14: | ||
52 | - reloc_pc14(code_ptr, target); | ||
53 | - break; | ||
54 | + return reloc_pc14(code_ptr, target); | ||
55 | case R_PPC_REL24: | ||
56 | - reloc_pc24(code_ptr, target); | ||
57 | - break; | ||
58 | + return reloc_pc24(code_ptr, target); | ||
59 | case R_PPC_ADDR16: | ||
60 | /* We are abusing this relocation type. This points to a pair | ||
61 | of insns, addis + load. If the displacement is small, we | ||
62 | @@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, | ||
63 | } else { | ||
64 | int16_t lo = value; | ||
65 | int hi = value - lo; | ||
66 | - assert(hi + lo == value); | ||
67 | + if (hi + lo != value) { | ||
68 | + return false; | 51 | + return false; |
69 | + } | 52 | + } |
70 | code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16); | 53 | + } else if (arg < TCG_REG_V0) { |
71 | code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo); | 54 | + if (have_isa_2_07) { |
55 | + tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD) | ||
56 | + | VRT(ret) | RA(arg)); | ||
57 | + break; | ||
58 | + } else { | ||
59 | + /* Altivec does not support integer->vector moves. */ | ||
60 | + return false; | ||
61 | + } | ||
72 | } | 62 | } |
73 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) | 63 | /* fallthru */ |
74 | TCGMemOp opc = get_memop(oi); | 64 | case TCG_TYPE_V64: |
75 | TCGReg hi, lo, arg = TCG_REG_R3; | ||
76 | |||
77 | - reloc_pc14(lb->label_ptr[0], s->code_ptr); | ||
78 | + **lb->label_ptr |= reloc_pc14_val(*lb->label_ptr, s->code_ptr); | ||
79 | |||
80 | tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0); | ||
81 | |||
82 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) | ||
83 | TCGMemOp s_bits = opc & MO_SIZE; | ||
84 | TCGReg hi, lo, arg = TCG_REG_R3; | ||
85 | |||
86 | - reloc_pc14(lb->label_ptr[0], s->code_ptr); | ||
87 | + **lb->label_ptr |= reloc_pc14_val(*lb->label_ptr, s->code_ptr); | ||
88 | |||
89 | tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0); | ||
90 | |||
91 | -- | 65 | -- |
92 | 2.17.2 | 66 | 2.17.1 |
93 | 67 | ||
94 | 68 | diff view generated by jsdifflib |
1 | Reviewed-by: Emilio G. Cota <cota@braap.org> | 1 | These new instructions are conditional only on MSR.VEC and |
---|---|---|---|
2 | are thus part of the Altivec instruction set, and not VSX. | ||
3 | This includes negation and compare not equal. | ||
4 | |||
5 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
2 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
3 | --- | 7 | --- |
4 | tcg/i386/tcg-target.inc.c | 9 +++++++++ | 8 | tcg/ppc/tcg-target.h | 2 +- |
5 | 1 file changed, 9 insertions(+) | 9 | tcg/ppc/tcg-target.inc.c | 23 +++++++++++++++++++++++ |
10 | 2 files changed, 24 insertions(+), 1 deletion(-) | ||
6 | 11 | ||
7 | diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c | 12 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h |
8 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
9 | --- a/tcg/i386/tcg-target.inc.c | 14 | --- a/tcg/ppc/tcg-target.h |
10 | +++ b/tcg/i386/tcg-target.inc.c | 15 | +++ b/tcg/ppc/tcg-target.h |
11 | @@ -XXX,XX +XXX,XX @@ static inline int setup_guest_base_seg(void) | 16 | @@ -XXX,XX +XXX,XX @@ extern bool have_vsx; |
12 | } | 17 | #define TCG_TARGET_HAS_andc_vec 1 |
13 | return 0; | 18 | #define TCG_TARGET_HAS_orc_vec have_isa_2_07 |
14 | } | 19 | #define TCG_TARGET_HAS_not_vec 1 |
15 | +# elif defined (__FreeBSD__) || defined (__FreeBSD_kernel__) | 20 | -#define TCG_TARGET_HAS_neg_vec 0 |
16 | +# include <machine/sysarch.h> | 21 | +#define TCG_TARGET_HAS_neg_vec have_isa_3_00 |
17 | +static inline int setup_guest_base_seg(void) | 22 | #define TCG_TARGET_HAS_abs_vec 0 |
18 | +{ | 23 | #define TCG_TARGET_HAS_shi_vec 0 |
19 | + if (sysarch(AMD64_SET_GSBASE, &guest_base) == 0) { | 24 | #define TCG_TARGET_HAS_shs_vec 0 |
20 | + return P_GS; | 25 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c |
21 | + } | 26 | index XXXXXXX..XXXXXXX 100644 |
22 | + return 0; | 27 | --- a/tcg/ppc/tcg-target.inc.c |
23 | +} | 28 | +++ b/tcg/ppc/tcg-target.inc.c |
24 | # else | 29 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, |
25 | static inline int setup_guest_base_seg(void) | 30 | #define VSUBUWM VX4(1152) |
26 | { | 31 | #define VSUBUDM VX4(1216) /* v2.07 */ |
32 | |||
33 | +#define VNEGW (VX4(1538) | (6 << 16)) /* v3.00 */ | ||
34 | +#define VNEGD (VX4(1538) | (7 << 16)) /* v3.00 */ | ||
35 | + | ||
36 | #define VMAXSB VX4(258) | ||
37 | #define VMAXSH VX4(322) | ||
38 | #define VMAXSW VX4(386) | ||
39 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
40 | #define VCMPGTUH VX4(582) | ||
41 | #define VCMPGTUW VX4(646) | ||
42 | #define VCMPGTUD VX4(711) /* v2.07 */ | ||
43 | +#define VCMPNEB VX4(7) /* v3.00 */ | ||
44 | +#define VCMPNEH VX4(71) /* v3.00 */ | ||
45 | +#define VCMPNEW VX4(135) /* v3.00 */ | ||
46 | |||
47 | #define VSLB VX4(260) | ||
48 | #define VSLH VX4(324) | ||
49 | @@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | ||
50 | case INDEX_op_shri_vec: | ||
51 | case INDEX_op_sari_vec: | ||
52 | return vece <= MO_32 || have_isa_2_07 ? -1 : 0; | ||
53 | + case INDEX_op_neg_vec: | ||
54 | + return vece >= MO_32 && have_isa_3_00; | ||
55 | case INDEX_op_mul_vec: | ||
56 | switch (vece) { | ||
57 | case MO_8: | ||
58 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
59 | static const uint32_t | ||
60 | add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM }, | ||
61 | sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM }, | ||
62 | + neg_op[4] = { 0, 0, VNEGW, VNEGD }, | ||
63 | eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD }, | ||
64 | + ne_op[4] = { VCMPNEB, VCMPNEH, VCMPNEW, 0 }, | ||
65 | gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD }, | ||
66 | gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD }, | ||
67 | ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 }, | ||
68 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
69 | case INDEX_op_sub_vec: | ||
70 | insn = sub_op[vece]; | ||
71 | break; | ||
72 | + case INDEX_op_neg_vec: | ||
73 | + insn = neg_op[vece]; | ||
74 | + a2 = a1; | ||
75 | + a1 = 0; | ||
76 | + break; | ||
77 | case INDEX_op_mul_vec: | ||
78 | tcg_debug_assert(vece == MO_32 && have_isa_2_07); | ||
79 | insn = VMULUWM; | ||
80 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
81 | case TCG_COND_EQ: | ||
82 | insn = eq_op[vece]; | ||
83 | break; | ||
84 | + case TCG_COND_NE: | ||
85 | + insn = ne_op[vece]; | ||
86 | + break; | ||
87 | case TCG_COND_GT: | ||
88 | insn = gts_op[vece]; | ||
89 | break; | ||
90 | @@ -XXX,XX +XXX,XX @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, | ||
91 | case TCG_COND_GTU: | ||
92 | break; | ||
93 | case TCG_COND_NE: | ||
94 | + if (have_isa_3_00 && vece <= MO_32) { | ||
95 | + break; | ||
96 | + } | ||
97 | + /* fall through */ | ||
98 | case TCG_COND_LE: | ||
99 | case TCG_COND_LEU: | ||
100 | need_inv = true; | ||
101 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
102 | case INDEX_op_dup2_vec: | ||
103 | return &v_v_v; | ||
104 | case INDEX_op_not_vec: | ||
105 | + case INDEX_op_neg_vec: | ||
106 | case INDEX_op_dup_vec: | ||
107 | return &v_v; | ||
108 | case INDEX_op_ld_vec: | ||
27 | -- | 109 | -- |
28 | 2.17.2 | 110 | 2.17.1 |
29 | 111 | ||
30 | 112 | diff view generated by jsdifflib |
1 | This does require an extra two checks within the slow paths | 1 | These new instructions are a mix of those like LXSD that are |
---|---|---|---|
2 | to replace the assert that we're moving. Also add two checks | 2 | only conditional only on MSR.VEC and those like LXV that are |
3 | within existing functions that lacked any kind of assert for | 3 | conditional on MSR.VEC for TX=1. Thus, in the end, we can |
4 | out of range branch. | 4 | consider all of these as Altivec instructions. |
5 | 5 | ||
6 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | 6 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> |
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
8 | --- | 8 | --- |
9 | tcg/s390/tcg-target.inc.c | 34 +++++++++++++++++++++++----------- | 9 | tcg/ppc/tcg-target.inc.c | 47 ++++++++++++++++++++++++++++++++-------- |
10 | 1 file changed, 23 insertions(+), 11 deletions(-) | 10 | 1 file changed, 38 insertions(+), 9 deletions(-) |
11 | 11 | ||
12 | diff --git a/tcg/s390/tcg-target.inc.c b/tcg/s390/tcg-target.inc.c | 12 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c |
13 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/tcg/s390/tcg-target.inc.c | 14 | --- a/tcg/ppc/tcg-target.inc.c |
15 | +++ b/tcg/s390/tcg-target.inc.c | 15 | +++ b/tcg/ppc/tcg-target.inc.c |
16 | @@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, | 16 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, |
17 | 17 | #define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */ | |
18 | switch (type) { | 18 | #define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */ |
19 | case R_390_PC16DBL: | 19 | #define LXSIWZX (XO31(12) | 1) /* v2.07, force tx=1 */ |
20 | - assert(pcrel2 == (int16_t)pcrel2); | 20 | +#define LXV (OPCD(61) | 8 | 1) /* v3.00, force tx=1 */ |
21 | - tcg_patch16(code_ptr, pcrel2); | 21 | +#define LXSD (OPCD(57) | 2) /* v3.00 */ |
22 | + if (pcrel2 == (int16_t)pcrel2) { | 22 | +#define LXVWSX (XO31(364) | 1) /* v3.00, force tx=1 */ |
23 | + tcg_patch16(code_ptr, pcrel2); | 23 | |
24 | + return true; | 24 | #define STVX XO31(231) |
25 | + } | 25 | #define STVEWX XO31(199) |
26 | #define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */ | ||
27 | #define STXSIWX (XO31(140) | 1) /* v2.07, force sx=1 */ | ||
28 | +#define STXV (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */ | ||
29 | +#define STXSD (OPCD(61) | 2) /* v3.00 */ | ||
30 | |||
31 | #define VADDSBS VX4(768) | ||
32 | #define VADDUBS VX4(512) | ||
33 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, | ||
34 | TCGReg base, tcg_target_long offset) | ||
35 | { | ||
36 | tcg_target_long orig = offset, l0, l1, extra = 0, align = 0; | ||
37 | - bool is_store = false; | ||
38 | + bool is_int_store = false; | ||
39 | TCGReg rs = TCG_REG_TMP1; | ||
40 | |||
41 | switch (opi) { | ||
42 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, | ||
43 | break; | ||
44 | } | ||
26 | break; | 45 | break; |
27 | case R_390_PC32DBL: | 46 | + case LXSD: |
28 | - assert(pcrel2 == (int32_t)pcrel2); | 47 | + case STXSD: |
29 | - tcg_patch32(code_ptr, pcrel2); | 48 | + align = 3; |
30 | + if (pcrel2 == (int32_t)pcrel2) { | 49 | + break; |
31 | + tcg_patch32(code_ptr, pcrel2); | 50 | + case LXV: |
32 | + return true; | 51 | + case STXV: |
33 | + } | 52 | + align = 15; |
53 | + break; | ||
54 | case STD: | ||
55 | align = 3; | ||
56 | /* FALLTHRU */ | ||
57 | case STB: case STH: case STW: | ||
58 | - is_store = true; | ||
59 | + is_int_store = true; | ||
34 | break; | 60 | break; |
35 | case R_390_20: | 61 | } |
36 | - assert(value == sextract64(value, 0, 20)); | 62 | |
37 | - old = *(uint32_t *)code_ptr & 0xf00000ff; | 63 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, |
38 | - old |= ((value & 0xfff) << 16) | ((value & 0xff000) >> 4); | 64 | if (rs == base) { |
39 | - tcg_patch32(code_ptr, old); | 65 | rs = TCG_REG_R0; |
40 | + if (value == sextract64(value, 0, 20)) { | 66 | } |
41 | + old = *(uint32_t *)code_ptr & 0xf00000ff; | 67 | - tcg_debug_assert(!is_store || rs != rt); |
42 | + old |= ((value & 0xfff) << 16) | ((value & 0xff000) >> 4); | 68 | + tcg_debug_assert(!is_int_store || rs != rt); |
43 | + tcg_patch32(code_ptr, old); | 69 | tcg_out_movi(s, TCG_TYPE_PTR, rs, orig); |
44 | + return true; | 70 | tcg_out32(s, opx | TAB(rt & 31, base, rs)); |
45 | + } | 71 | return; |
72 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, | ||
73 | case TCG_TYPE_V64: | ||
74 | tcg_debug_assert(ret >= TCG_REG_V0); | ||
75 | if (have_vsx) { | ||
76 | - tcg_out_mem_long(s, 0, LXSDX, ret, base, offset); | ||
77 | + tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX, | ||
78 | + ret, base, offset); | ||
79 | break; | ||
80 | } | ||
81 | tcg_debug_assert((offset & 7) == 0); | ||
82 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, | ||
83 | case TCG_TYPE_V128: | ||
84 | tcg_debug_assert(ret >= TCG_REG_V0); | ||
85 | tcg_debug_assert((offset & 15) == 0); | ||
86 | - tcg_out_mem_long(s, 0, LVX, ret, base, offset); | ||
87 | + tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0, | ||
88 | + LVX, ret, base, offset); | ||
46 | break; | 89 | break; |
47 | default: | 90 | default: |
48 | g_assert_not_reached(); | 91 | g_assert_not_reached(); |
49 | } | 92 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, |
50 | - return true; | 93 | case TCG_TYPE_V64: |
51 | + return false; | 94 | tcg_debug_assert(arg >= TCG_REG_V0); |
52 | } | 95 | if (have_vsx) { |
53 | 96 | - tcg_out_mem_long(s, 0, STXSDX, arg, base, offset); | |
54 | /* parse target specific constraints */ | 97 | + tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0, |
55 | @@ -XXX,XX +XXX,XX @@ static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc, | 98 | + STXSDX, arg, base, offset); |
56 | 99 | break; | |
57 | if (l->has_value) { | 100 | } |
58 | off = l->u.value_ptr - s->code_ptr; | 101 | tcg_debug_assert((offset & 7) == 0); |
59 | + tcg_debug_assert(off == (int16_t)off); | 102 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, |
60 | } else { | 103 | break; |
61 | tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2); | 104 | case TCG_TYPE_V128: |
62 | } | 105 | tcg_debug_assert(arg >= TCG_REG_V0); |
63 | @@ -XXX,XX +XXX,XX @@ static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc, | 106 | - tcg_out_mem_long(s, 0, STVX, arg, base, offset); |
64 | 107 | + tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0, | |
65 | if (l->has_value) { | 108 | + STVX, arg, base, offset); |
66 | off = l->u.value_ptr - s->code_ptr; | 109 | break; |
67 | + tcg_debug_assert(off == (int16_t)off); | 110 | default: |
68 | } else { | 111 | g_assert_not_reached(); |
69 | tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2); | 112 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, |
70 | } | 113 | tcg_debug_assert(out >= TCG_REG_V0); |
71 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) | 114 | switch (vece) { |
72 | TCGMemOpIdx oi = lb->oi; | 115 | case MO_8: |
73 | TCGMemOp opc = get_memop(oi); | 116 | - tcg_out_mem_long(s, 0, LVEBX, out, base, offset); |
74 | 117 | + if (have_isa_3_00) { | |
75 | - patch_reloc(lb->label_ptr[0], R_390_PC16DBL, (intptr_t)s->code_ptr, 2); | 118 | + tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16); |
76 | + bool ok = patch_reloc(lb->label_ptr[0], R_390_PC16DBL, | 119 | + } else { |
77 | + (intptr_t)s->code_ptr, 2); | 120 | + tcg_out_mem_long(s, 0, LVEBX, out, base, offset); |
78 | + tcg_debug_assert(ok); | 121 | + } |
79 | 122 | elt = extract32(offset, 0, 4); | |
80 | tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0); | 123 | #ifndef HOST_WORDS_BIGENDIAN |
81 | if (TARGET_LONG_BITS == 64) { | 124 | elt ^= 15; |
82 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) | 125 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, |
83 | TCGMemOpIdx oi = lb->oi; | 126 | break; |
84 | TCGMemOp opc = get_memop(oi); | 127 | case MO_16: |
85 | 128 | tcg_debug_assert((offset & 1) == 0); | |
86 | - patch_reloc(lb->label_ptr[0], R_390_PC16DBL, (intptr_t)s->code_ptr, 2); | 129 | - tcg_out_mem_long(s, 0, LVEHX, out, base, offset); |
87 | + bool ok = patch_reloc(lb->label_ptr[0], R_390_PC16DBL, | 130 | + if (have_isa_3_00) { |
88 | + (intptr_t)s->code_ptr, 2); | 131 | + tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16); |
89 | + tcg_debug_assert(ok); | 132 | + } else { |
90 | 133 | + tcg_out_mem_long(s, 0, LVEHX, out, base, offset); | |
91 | tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0); | 134 | + } |
92 | if (TARGET_LONG_BITS == 64) { | 135 | elt = extract32(offset, 1, 3); |
136 | #ifndef HOST_WORDS_BIGENDIAN | ||
137 | elt ^= 7; | ||
138 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, | ||
139 | tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16)); | ||
140 | break; | ||
141 | case MO_32: | ||
142 | + if (have_isa_3_00) { | ||
143 | + tcg_out_mem_long(s, 0, LXVWSX, out, base, offset); | ||
144 | + break; | ||
145 | + } | ||
146 | tcg_debug_assert((offset & 3) == 0); | ||
147 | tcg_out_mem_long(s, 0, LVEWX, out, base, offset); | ||
148 | elt = extract32(offset, 2, 2); | ||
93 | -- | 149 | -- |
94 | 2.17.2 | 150 | 2.17.1 |
95 | 151 | ||
96 | 152 | diff view generated by jsdifflib |
1 | These values are constant between all qemu_ld/st invocations; | 1 | These new instructions are conditional on MSR.VEC for TX=1, |
---|---|---|---|
2 | there is no need to figure this out each time. If we cannot | 2 | so we can consider these Altivec instructions. |
3 | use a segment or an offset directly for guest_base, load the | ||
4 | value into a register in the prologue. | ||
5 | 3 | ||
6 | Reviewed-by: Emilio G. Cota <cota@braap.org> | 4 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> |
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
8 | --- | 6 | --- |
9 | tcg/i386/tcg-target.inc.c | 101 +++++++++++++++----------------------- | 7 | tcg/ppc/tcg-target.inc.c | 28 ++++++++++++++++++++++++++-- |
10 | 1 file changed, 40 insertions(+), 61 deletions(-) | 8 | 1 file changed, 26 insertions(+), 2 deletions(-) |
11 | 9 | ||
12 | diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c | 10 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c |
13 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/tcg/i386/tcg-target.inc.c | 12 | --- a/tcg/ppc/tcg-target.inc.c |
15 | +++ b/tcg/i386/tcg-target.inc.c | 13 | +++ b/tcg/ppc/tcg-target.inc.c |
16 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) | 14 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, |
17 | tcg_out_push(s, retaddr); | 15 | |
18 | tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); | 16 | #define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */ |
19 | } | 17 | #define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */ |
20 | -#elif defined(__x86_64__) && defined(__linux__) | 18 | +#define XXSPLTIB (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */ |
21 | -# include <asm/prctl.h> | 19 | |
22 | -# include <sys/prctl.h> | 20 | #define MFVSRD (XO31(51) | 1) /* v2.07, force sx=1 */ |
23 | - | 21 | #define MFVSRWZ (XO31(115) | 1) /* v2.07, force sx=1 */ |
24 | +#elif TCG_TARGET_REG_BITS == 32 | 22 | #define MTVSRD (XO31(179) | 1) /* v2.07, force tx=1 */ |
25 | +# define x86_guest_base_seg 0 | 23 | #define MTVSRWZ (XO31(243) | 1) /* v2.07, force tx=1 */ |
26 | +# define x86_guest_base_index -1 | 24 | +#define MTVSRDD (XO31(435) | 1) /* v3.00, force tx=1 */ |
27 | +# define x86_guest_base_offset guest_base | 25 | +#define MTVSRWS (XO31(403) | 1) /* v3.00, force tx=1 */ |
28 | +#else | 26 | |
29 | +static int x86_guest_base_seg; | 27 | #define RT(r) ((r)<<21) |
30 | +static int x86_guest_base_index = -1; | 28 | #define RS(r) ((r)<<21) |
31 | +static int32_t x86_guest_base_offset; | 29 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret, |
32 | +# if defined(__x86_64__) && defined(__linux__) | 30 | return; |
33 | +# include <asm/prctl.h> | 31 | } |
34 | +# include <sys/prctl.h> | 32 | } |
35 | int arch_prctl(int code, unsigned long addr); | 33 | + if (have_isa_3_00 && val == (tcg_target_long)dup_const(MO_8, val)) { |
36 | - | 34 | + tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11)); |
37 | -static int guest_base_flags; | 35 | + return; |
38 | -static inline void setup_guest_base_seg(void) | 36 | + } |
39 | +static inline int setup_guest_base_seg(void) | 37 | |
38 | /* | ||
39 | * Otherwise we must load the value from the constant pool. | ||
40 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, | ||
41 | TCGReg dst, TCGReg src) | ||
40 | { | 42 | { |
41 | if (arch_prctl(ARCH_SET_GS, guest_base) == 0) { | 43 | tcg_debug_assert(dst >= TCG_REG_V0); |
42 | - guest_base_flags = P_GS; | 44 | - tcg_debug_assert(src >= TCG_REG_V0); |
43 | + return P_GS; | 45 | + |
44 | } | 46 | + /* Splat from integer reg allowed via constraints for v3.00. */ |
45 | + return 0; | 47 | + if (src < TCG_REG_V0) { |
46 | } | 48 | + tcg_debug_assert(have_isa_3_00); |
47 | -#else | 49 | + switch (vece) { |
48 | -# define guest_base_flags 0 | 50 | + case MO_64: |
49 | -static inline void setup_guest_base_seg(void) { } | 51 | + tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src)); |
50 | +# else | 52 | + return true; |
51 | +static inline int setup_guest_base_seg(void) | 53 | + case MO_32: |
52 | +{ | 54 | + tcg_out32(s, MTVSRWS | VRT(dst) | RA(src)); |
53 | + return 0; | 55 | + return true; |
54 | +} | 56 | + default: |
55 | +# endif | 57 | + /* Fail, so that we fall back on either dupm or mov+dup. */ |
56 | #endif /* SOFTMMU */ | 58 | + return false; |
57 | |||
58 | static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, | ||
59 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) | ||
60 | add_qemu_ldst_label(s, true, is64, oi, datalo, datahi, addrlo, addrhi, | ||
61 | s->code_ptr, label_ptr); | ||
62 | #else | ||
63 | - { | ||
64 | - int32_t offset = guest_base; | ||
65 | - int index = -1; | ||
66 | - int seg = 0; | ||
67 | - | ||
68 | - /* | ||
69 | - * Recall we store 32-bit values zero-extended. No need for | ||
70 | - * further manual extension or an addr32 (0x67) prefix. | ||
71 | - */ | ||
72 | - if (guest_base == 0 || guest_base_flags) { | ||
73 | - seg = guest_base_flags; | ||
74 | - offset = 0; | ||
75 | - } else if (TCG_TARGET_REG_BITS == 64 && offset != guest_base) { | ||
76 | - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base); | ||
77 | - index = TCG_REG_L1; | ||
78 | - offset = 0; | ||
79 | - } | ||
80 | - | ||
81 | - tcg_out_qemu_ld_direct(s, datalo, datahi, | ||
82 | - addrlo, index, offset, seg, is64, opc); | ||
83 | - } | ||
84 | + tcg_out_qemu_ld_direct(s, datalo, datahi, addrlo, x86_guest_base_index, | ||
85 | + x86_guest_base_offset, x86_guest_base_seg, | ||
86 | + is64, opc); | ||
87 | #endif | ||
88 | } | ||
89 | |||
90 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) | ||
91 | add_qemu_ldst_label(s, false, is64, oi, datalo, datahi, addrlo, addrhi, | ||
92 | s->code_ptr, label_ptr); | ||
93 | #else | ||
94 | - { | ||
95 | - int32_t offset = guest_base; | ||
96 | - int index = -1; | ||
97 | - int seg = 0; | ||
98 | - | ||
99 | - /* | ||
100 | - * Recall we store 32-bit values zero-extended. No need for | ||
101 | - * further manual extension or an addr32 (0x67) prefix. | ||
102 | - */ | ||
103 | - if (guest_base == 0 || guest_base_flags) { | ||
104 | - seg = guest_base_flags; | ||
105 | - offset = 0; | ||
106 | - } else if (TCG_TARGET_REG_BITS == 64 && offset != guest_base) { | ||
107 | - /* ??? Note that we require L0 free for bswap. */ | ||
108 | - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base); | ||
109 | - index = TCG_REG_L1; | ||
110 | - offset = 0; | ||
111 | - } | ||
112 | - | ||
113 | - tcg_out_qemu_st_direct(s, datalo, datahi, | ||
114 | - addrlo, index, offset, seg, opc); | ||
115 | - } | ||
116 | + tcg_out_qemu_st_direct(s, datalo, datahi, addrlo, x86_guest_base_index, | ||
117 | + x86_guest_base_offset, x86_guest_base_seg, opc); | ||
118 | #endif | ||
119 | } | ||
120 | |||
121 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s) | ||
122 | (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4 | ||
123 | + stack_addend); | ||
124 | #else | ||
125 | +# if !defined(CONFIG_SOFTMMU) && TCG_TARGET_REG_BITS == 64 | ||
126 | + if (guest_base) { | ||
127 | + int seg = setup_guest_base_seg(); | ||
128 | + if (seg != 0) { | ||
129 | + x86_guest_base_seg = seg; | ||
130 | + } else if (guest_base == (int32_t)guest_base) { | ||
131 | + x86_guest_base_offset = guest_base; | ||
132 | + } else { | ||
133 | + /* Choose R12 because, as a base, it requires a SIB byte. */ | ||
134 | + x86_guest_base_index = TCG_REG_R12; | ||
135 | + tcg_out_mov(s, TCG_TYPE_PTR, x86_guest_base_index, guest_base); | ||
136 | + tcg_regset_set_reg(s->reserved_regs, x86_guest_base_index); | ||
137 | + } | 59 | + } |
138 | + } | 60 | + } |
139 | +# endif | 61 | |
140 | tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); | 62 | /* |
141 | tcg_out_addi(s, TCG_REG_ESP, -stack_addend); | 63 | * Recall we use (or emulate) VSX integer loads, so the integer is |
142 | /* jmp *tb. */ | 64 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) |
143 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s) | 65 | static const TCGTargetOpDef sub2 |
144 | tcg_out_pop(s, tcg_target_callee_save_regs[i]); | 66 | = { .args_ct_str = { "r", "r", "rI", "rZM", "r", "r" } }; |
145 | } | 67 | static const TCGTargetOpDef v_r = { .args_ct_str = { "v", "r" } }; |
146 | tcg_out_opc(s, OPC_RET, 0, 0, 0); | 68 | + static const TCGTargetOpDef v_vr = { .args_ct_str = { "v", "vr" } }; |
147 | - | 69 | static const TCGTargetOpDef v_v = { .args_ct_str = { "v", "v" } }; |
148 | -#if !defined(CONFIG_SOFTMMU) | 70 | static const TCGTargetOpDef v_v_v = { .args_ct_str = { "v", "v", "v" } }; |
149 | - /* Try to set up a segment register to point to guest_base. */ | 71 | static const TCGTargetOpDef v_v_v_v |
150 | - if (guest_base) { | 72 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) |
151 | - setup_guest_base_seg(); | 73 | return &v_v_v; |
152 | - } | 74 | case INDEX_op_not_vec: |
153 | -#endif | 75 | case INDEX_op_neg_vec: |
154 | } | 76 | - case INDEX_op_dup_vec: |
155 | 77 | return &v_v; | |
156 | static void tcg_out_nop_fill(tcg_insn_unit *p, int count) | 78 | + case INDEX_op_dup_vec: |
79 | + return have_isa_3_00 ? &v_vr : &v_v; | ||
80 | case INDEX_op_ld_vec: | ||
81 | case INDEX_op_st_vec: | ||
82 | case INDEX_op_dupm_vec: | ||
157 | -- | 83 | -- |
158 | 2.17.2 | 84 | 2.17.1 |
159 | 85 | ||
160 | 86 | diff view generated by jsdifflib |
1 | From: "Emilio G. Cota" <cota@braap.org> | 1 | From: Alex Bennée <alex.bennee@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Before moving them all to include/qemu/xxhash.h. | 3 | qemu_cpu_kick is used for a number of reasons including to indicate |
4 | there is work to be done. However when thread=single the old | ||
5 | qemu_cpu_kick_rr_cpu only advanced the vCPU to the next executing one | ||
6 | which can lead to a hang in the case that: | ||
4 | 7 | ||
5 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | 8 | a) the kick is from outside the vCPUs (e.g. iothread) |
6 | Signed-off-by: Emilio G. Cota <cota@braap.org> | 9 | b) the timers are paused (i.e. iothread calling run_on_cpu) |
10 | |||
11 | To avoid this lets split qemu_cpu_kick_rr into two functions. One for | ||
12 | the timer which continues to advance to the next timeslice and another | ||
13 | for all other kicks. | ||
14 | |||
15 | Message-Id: <20191001160426.26644-1-alex.bennee@linaro.org> | ||
16 | Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> | ||
17 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
18 | Signed-off-by: Alex Bennée <alex.bennee@linaro.org> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 19 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
8 | --- | 20 | --- |
9 | include/exec/tb-hash-xx.h | 41 +++++++++++++++++++++++++++++---------- | 21 | cpus.c | 24 ++++++++++++++++++------ |
10 | include/exec/tb-hash.h | 2 +- | 22 | 1 file changed, 18 insertions(+), 6 deletions(-) |
11 | tests/qht-bench.c | 2 +- | ||
12 | util/qsp.c | 12 ++++++------ | ||
13 | 4 files changed, 39 insertions(+), 18 deletions(-) | ||
14 | 23 | ||
15 | diff --git a/include/exec/tb-hash-xx.h b/include/exec/tb-hash-xx.h | 24 | diff --git a/cpus.c b/cpus.c |
16 | index XXXXXXX..XXXXXXX 100644 | 25 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/include/exec/tb-hash-xx.h | 26 | --- a/cpus.c |
18 | +++ b/include/exec/tb-hash-xx.h | 27 | +++ b/cpus.c |
19 | @@ -XXX,XX +XXX,XX @@ | 28 | @@ -XXX,XX +XXX,XX @@ static inline int64_t qemu_tcg_next_kick(void) |
20 | #define PRIME32_4 668265263U | 29 | return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD; |
21 | #define PRIME32_5 374761393U | 30 | } |
22 | 31 | ||
23 | -#define TB_HASH_XX_SEED 1 | 32 | -/* Kick the currently round-robin scheduled vCPU */ |
24 | +#define QEMU_XXHASH_SEED 1 | 33 | -static void qemu_cpu_kick_rr_cpu(void) |
25 | 34 | +/* Kick the currently round-robin scheduled vCPU to next */ | |
26 | /* | 35 | +static void qemu_cpu_kick_rr_next_cpu(void) |
27 | * xxhash32, customized for input variables that are not guaranteed to be | ||
28 | * contiguous in memory. | ||
29 | */ | ||
30 | static inline uint32_t | ||
31 | -tb_hash_func7(uint64_t a0, uint64_t b0, uint32_t e, uint32_t f, uint32_t g) | ||
32 | +qemu_xxhash7(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f, uint32_t g) | ||
33 | { | 36 | { |
34 | - uint32_t v1 = TB_HASH_XX_SEED + PRIME32_1 + PRIME32_2; | 37 | CPUState *cpu; |
35 | - uint32_t v2 = TB_HASH_XX_SEED + PRIME32_2; | 38 | do { |
36 | - uint32_t v3 = TB_HASH_XX_SEED + 0; | 39 | @@ -XXX,XX +XXX,XX @@ static void qemu_cpu_kick_rr_cpu(void) |
37 | - uint32_t v4 = TB_HASH_XX_SEED - PRIME32_1; | 40 | } while (cpu != atomic_mb_read(&tcg_current_rr_cpu)); |
38 | - uint32_t a = a0 >> 32; | ||
39 | - uint32_t b = a0; | ||
40 | - uint32_t c = b0 >> 32; | ||
41 | - uint32_t d = b0; | ||
42 | + uint32_t v1 = QEMU_XXHASH_SEED + PRIME32_1 + PRIME32_2; | ||
43 | + uint32_t v2 = QEMU_XXHASH_SEED + PRIME32_2; | ||
44 | + uint32_t v3 = QEMU_XXHASH_SEED + 0; | ||
45 | + uint32_t v4 = QEMU_XXHASH_SEED - PRIME32_1; | ||
46 | + uint32_t a = ab >> 32; | ||
47 | + uint32_t b = ab; | ||
48 | + uint32_t c = cd >> 32; | ||
49 | + uint32_t d = cd; | ||
50 | uint32_t h32; | ||
51 | |||
52 | v1 += a * PRIME32_2; | ||
53 | @@ -XXX,XX +XXX,XX @@ tb_hash_func7(uint64_t a0, uint64_t b0, uint32_t e, uint32_t f, uint32_t g) | ||
54 | return h32; | ||
55 | } | 41 | } |
56 | 42 | ||
57 | +static inline uint32_t qemu_xxhash2(uint64_t ab) | 43 | +/* Kick all RR vCPUs */ |
44 | +static void qemu_cpu_kick_rr_cpus(void) | ||
58 | +{ | 45 | +{ |
59 | + return qemu_xxhash7(ab, 0, 0, 0, 0); | 46 | + CPUState *cpu; |
47 | + | ||
48 | + CPU_FOREACH(cpu) { | ||
49 | + cpu_exit(cpu); | ||
50 | + }; | ||
60 | +} | 51 | +} |
61 | + | 52 | + |
62 | +static inline uint32_t qemu_xxhash4(uint64_t ab, uint64_t cd) | 53 | static void do_nothing(CPUState *cpu, run_on_cpu_data unused) |
63 | +{ | ||
64 | + return qemu_xxhash7(ab, cd, 0, 0, 0); | ||
65 | +} | ||
66 | + | ||
67 | +static inline uint32_t qemu_xxhash5(uint64_t ab, uint64_t cd, uint32_t e) | ||
68 | +{ | ||
69 | + return qemu_xxhash7(ab, cd, e, 0, 0); | ||
70 | +} | ||
71 | + | ||
72 | +static inline uint32_t qemu_xxhash6(uint64_t ab, uint64_t cd, uint32_t e, | ||
73 | + uint32_t f) | ||
74 | +{ | ||
75 | + return qemu_xxhash7(ab, cd, e, f, 0); | ||
76 | +} | ||
77 | + | ||
78 | #endif /* EXEC_TB_HASH_XX_H */ | ||
79 | diff --git a/include/exec/tb-hash.h b/include/exec/tb-hash.h | ||
80 | index XXXXXXX..XXXXXXX 100644 | ||
81 | --- a/include/exec/tb-hash.h | ||
82 | +++ b/include/exec/tb-hash.h | ||
83 | @@ -XXX,XX +XXX,XX @@ static inline | ||
84 | uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags, | ||
85 | uint32_t cf_mask, uint32_t trace_vcpu_dstate) | ||
86 | { | 54 | { |
87 | - return tb_hash_func7(phys_pc, pc, flags, cf_mask, trace_vcpu_dstate); | ||
88 | + return qemu_xxhash7(phys_pc, pc, flags, cf_mask, trace_vcpu_dstate); | ||
89 | } | 55 | } |
90 | 56 | @@ -XXX,XX +XXX,XX @@ void qemu_timer_notify_cb(void *opaque, QEMUClockType type) | |
91 | #endif | 57 | static void kick_tcg_thread(void *opaque) |
92 | diff --git a/tests/qht-bench.c b/tests/qht-bench.c | ||
93 | index XXXXXXX..XXXXXXX 100644 | ||
94 | --- a/tests/qht-bench.c | ||
95 | +++ b/tests/qht-bench.c | ||
96 | @@ -XXX,XX +XXX,XX @@ static bool is_equal(const void *ap, const void *bp) | ||
97 | |||
98 | static uint32_t h(unsigned long v) | ||
99 | { | 58 | { |
100 | - return tb_hash_func7(v, 0, 0, 0, 0); | 59 | timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick()); |
101 | + return qemu_xxhash2(v); | 60 | - qemu_cpu_kick_rr_cpu(); |
61 | + qemu_cpu_kick_rr_next_cpu(); | ||
102 | } | 62 | } |
103 | 63 | ||
104 | static uint32_t hval(unsigned long v) | 64 | static void start_tcg_kick_timer(void) |
105 | diff --git a/util/qsp.c b/util/qsp.c | 65 | @@ -XXX,XX +XXX,XX @@ void qemu_cpu_kick(CPUState *cpu) |
106 | index XXXXXXX..XXXXXXX 100644 | ||
107 | --- a/util/qsp.c | ||
108 | +++ b/util/qsp.c | ||
109 | @@ -XXX,XX +XXX,XX @@ QemuCondWaitFunc qemu_cond_wait_func = qemu_cond_wait_impl; | ||
110 | * without it we still get a pretty unique hash. | ||
111 | */ | ||
112 | static inline | ||
113 | -uint32_t do_qsp_callsite_hash(const QSPCallSite *callsite, uint64_t a) | ||
114 | +uint32_t do_qsp_callsite_hash(const QSPCallSite *callsite, uint64_t ab) | ||
115 | { | 66 | { |
116 | - uint64_t b = (uint64_t)(uintptr_t)callsite->obj; | 67 | qemu_cond_broadcast(cpu->halt_cond); |
117 | + uint64_t cd = (uint64_t)(uintptr_t)callsite->obj; | 68 | if (tcg_enabled()) { |
118 | uint32_t e = callsite->line; | 69 | - cpu_exit(cpu); |
119 | uint32_t f = callsite->type; | 70 | - /* NOP unless doing single-thread RR */ |
120 | 71 | - qemu_cpu_kick_rr_cpu(); | |
121 | - return tb_hash_func7(a, b, e, f, 0); | 72 | + if (qemu_tcg_mttcg_enabled()) { |
122 | + return qemu_xxhash6(ab, cd, e, f); | 73 | + cpu_exit(cpu); |
123 | } | 74 | + } else { |
124 | 75 | + qemu_cpu_kick_rr_cpus(); | |
125 | static inline | 76 | + } |
126 | @@ -XXX,XX +XXX,XX @@ static uint32_t qsp_entry_no_thread_hash(const QSPEntry *entry) | 77 | } else { |
127 | static uint32_t qsp_entry_no_thread_obj_hash(const QSPEntry *entry) | 78 | if (hax_enabled()) { |
128 | { | 79 | /* |
129 | const QSPCallSite *callsite = entry->callsite; | ||
130 | - uint64_t a = g_str_hash(callsite->file); | ||
131 | - uint64_t b = callsite->line; | ||
132 | + uint64_t ab = g_str_hash(callsite->file); | ||
133 | + uint64_t cd = callsite->line; | ||
134 | uint32_t e = callsite->type; | ||
135 | |||
136 | - return tb_hash_func7(a, b, e, 0, 0); | ||
137 | + return qemu_xxhash5(ab, cd, e); | ||
138 | } | ||
139 | |||
140 | static bool qsp_callsite_cmp(const void *ap, const void *bp) | ||
141 | -- | 80 | -- |
142 | 2.17.2 | 81 | 2.17.1 |
143 | 82 | ||
144 | 83 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | I didn't get this fix pushed back into the patch set that I actually | ||
2 | sent last week. The patch is in target-arm.next, and I'm sure you | ||
3 | would have eventually seen the error in testing. | ||
4 | 1 | ||
5 | |||
6 | r~ | ||
7 | --- | ||
8 | target/arm/kvm64.c | 4 ++-- | ||
9 | 1 file changed, 2 insertions(+), 2 deletions(-) | ||
10 | |||
11 | diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/target/arm/kvm64.c | ||
14 | +++ b/target/arm/kvm64.c | ||
15 | @@ -XXX,XX +XXX,XX @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) | ||
16 | ARM64_SYS_REG(3, 0, 0, 6, 0)); | ||
17 | err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar1, | ||
18 | ARM64_SYS_REG(3, 0, 0, 6, 1)); | ||
19 | - err |= read_sys_reg64(fdarray[2], &achf->isar.id_aa64mmfr0, | ||
20 | + err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr0, | ||
21 | ARM64_SYS_REG(3, 0, 0, 7, 0)); | ||
22 | - err |= read_sys_reg64(fdarray[2], &achf->isar.id_aa64mmfr1, | ||
23 | + err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr1, | ||
24 | ARM64_SYS_REG(3, 0, 0, 7, 1)); | ||
25 | |||
26 | /* | ||
27 | -- | ||
28 | 2.17.2 | ||
29 | |||
30 | diff view generated by jsdifflib |