1 | The following changes since commit 9e5319ca52a5b9e84d55ad9c36e2c0b317a122bb: | 1 | The following changes since commit 7fe6cb68117ac856e03c93d18aca09de015392b0: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging (2019-10-04 18:32:34 +0100) | 3 | Merge tag 'pull-target-arm-20230530-1' of https://git.linaro.org/people/pmaydell/qemu-arm into staging (2023-05-30 08:02:05 -0700) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | https://github.com/rth7680/qemu.git tags/pull-tcg-20191013 | 7 | https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230530 |
8 | 8 | ||
9 | for you to fetch changes up to d2f86bba6931388e275e8eb4ccd1dbcc7cae6328: | 9 | for you to fetch changes up to 276d77de503e8f5f5cbd3f7d94302ca12d1d982e: |
10 | 10 | ||
11 | cpus: kick all vCPUs when running thread=single (2019-10-07 14:08:58 -0400) | 11 | tests/decode: Add tests for various named-field cases (2023-05-30 10:55:39 -0700) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Host vector support for tcg/ppc. | 14 | Improvements to 128-bit atomics: |
15 | Fix thread=single cpu kicking. | 15 | - Separate __int128_t type and arithmetic detection |
16 | - Support 128-bit load/store in backend for i386, aarch64, ppc64, s390x | ||
17 | - Accelerate atomics via host/include/ | ||
18 | Decodetree: | ||
19 | - Add named field syntax | ||
20 | - Move tests to meson | ||
16 | 21 | ||
17 | ---------------------------------------------------------------- | 22 | ---------------------------------------------------------------- |
18 | Alex Bennée (1): | 23 | Peter Maydell (5): |
19 | cpus: kick all vCPUs when running thread=single | 24 | docs: Document decodetree named field syntax |
25 | scripts/decodetree: Pass lvalue-formatter function to str_extract() | ||
26 | scripts/decodetree: Implement a topological sort | ||
27 | scripts/decodetree: Implement named field support | ||
28 | tests/decode: Add tests for various named-field cases | ||
20 | 29 | ||
21 | Richard Henderson (22): | 30 | Richard Henderson (22): |
22 | tcg/ppc: Introduce Altivec registers | 31 | tcg: Fix register move type in tcg_out_ld_helper_ret |
23 | tcg/ppc: Introduce macro VX4() | 32 | accel/tcg: Fix check for page writeability in load_atomic16_or_exit |
24 | tcg/ppc: Introduce macros VRT(), VRA(), VRB(), VRC() | 33 | meson: Split test for __int128_t type from __int128_t arithmetic |
25 | tcg/ppc: Create TCGPowerISA and have_isa | 34 | qemu/atomic128: Add x86_64 atomic128-ldst.h |
26 | tcg/ppc: Replace HAVE_ISA_2_06 | 35 | tcg/i386: Support 128-bit load/store |
27 | tcg/ppc: Replace HAVE_ISEL macro with a variable | 36 | tcg/aarch64: Rename temporaries |
28 | tcg/ppc: Enable tcg backend vector compilation | 37 | tcg/aarch64: Reserve TCG_REG_TMP1, TCG_REG_TMP2 |
29 | tcg/ppc: Add support for load/store/logic/comparison | 38 | tcg/aarch64: Simplify constraints on qemu_ld/st |
30 | tcg/ppc: Add support for vector maximum/minimum | 39 | tcg/aarch64: Support 128-bit load/store |
31 | tcg/ppc: Add support for vector add/subtract | 40 | tcg/ppc: Support 128-bit load/store |
32 | tcg/ppc: Add support for vector saturated add/subtract | 41 | tcg/s390x: Support 128-bit load/store |
33 | tcg/ppc: Support vector shift by immediate | 42 | accel/tcg: Extract load_atom_extract_al16_or_al8 to host header |
34 | tcg/ppc: Support vector multiply | 43 | accel/tcg: Extract store_atom_insert_al16 to host header |
35 | tcg/ppc: Support vector dup2 | 44 | accel/tcg: Add x86_64 load_atom_extract_al16_or_al8 |
36 | tcg/ppc: Enable Altivec detection | 45 | accel/tcg: Add aarch64 lse2 load_atom_extract_al16_or_al8 |
37 | tcg/ppc: Update vector support for VSX | 46 | accel/tcg: Add aarch64 store_atom_insert_al16 |
38 | tcg/ppc: Update vector support for v2.07 Altivec | 47 | tcg: Remove TCG_TARGET_TLB_DISPLACEMENT_BITS |
39 | tcg/ppc: Update vector support for v2.07 VSX | 48 | decodetree: Add --test-for-error |
40 | tcg/ppc: Update vector support for v2.07 FP | 49 | decodetree: Fix recursion in prop_format and build_tree |
41 | tcg/ppc: Update vector support for v3.00 Altivec | 50 | decodetree: Diagnose empty pattern group |
42 | tcg/ppc: Update vector support for v3.00 load/store | 51 | decodetree: Do not remove output_file from /dev |
43 | tcg/ppc: Update vector support for v3.00 dup/dupi | 52 | tests/decode: Convert tests to meson |
44 | 53 | ||
45 | tcg/ppc/tcg-target.h | 51 ++- | 54 | docs/devel/decodetree.rst | 33 ++- |
46 | tcg/ppc/tcg-target.opc.h | 13 + | 55 | meson.build | 15 +- |
47 | cpus.c | 24 +- | 56 | host/include/aarch64/host/load-extract-al16-al8.h | 40 ++++ |
48 | tcg/ppc/tcg-target.inc.c | 1118 ++++++++++++++++++++++++++++++++++++++++++---- | 57 | host/include/aarch64/host/store-insert-al16.h | 47 ++++ |
49 | 4 files changed, 1119 insertions(+), 87 deletions(-) | 58 | host/include/generic/host/load-extract-al16-al8.h | 45 ++++ |
50 | create mode 100644 tcg/ppc/tcg-target.opc.h | 59 | host/include/generic/host/store-insert-al16.h | 50 ++++ |
51 | 60 | host/include/x86_64/host/atomic128-ldst.h | 68 ++++++ | |
61 | host/include/x86_64/host/load-extract-al16-al8.h | 50 ++++ | ||
62 | include/qemu/int128.h | 4 +- | ||
63 | tcg/aarch64/tcg-target-con-set.h | 4 +- | ||
64 | tcg/aarch64/tcg-target-con-str.h | 1 - | ||
65 | tcg/aarch64/tcg-target.h | 12 +- | ||
66 | tcg/arm/tcg-target.h | 1 - | ||
67 | tcg/i386/tcg-target.h | 5 +- | ||
68 | tcg/mips/tcg-target.h | 1 - | ||
69 | tcg/ppc/tcg-target-con-set.h | 2 + | ||
70 | tcg/ppc/tcg-target-con-str.h | 1 + | ||
71 | tcg/ppc/tcg-target.h | 4 +- | ||
72 | tcg/riscv/tcg-target.h | 1 - | ||
73 | tcg/s390x/tcg-target-con-set.h | 2 + | ||
74 | tcg/s390x/tcg-target.h | 3 +- | ||
75 | tcg/sparc64/tcg-target.h | 1 - | ||
76 | tcg/tci/tcg-target.h | 1 - | ||
77 | tests/decode/err_field10.decode | 7 + | ||
78 | tests/decode/err_field7.decode | 7 + | ||
79 | tests/decode/err_field8.decode | 8 + | ||
80 | tests/decode/err_field9.decode | 14 ++ | ||
81 | tests/decode/succ_named_field.decode | 19 ++ | ||
82 | tcg/tcg.c | 4 +- | ||
83 | accel/tcg/ldst_atomicity.c.inc | 80 +------ | ||
84 | tcg/aarch64/tcg-target.c.inc | 243 +++++++++++++++----- | ||
85 | tcg/i386/tcg-target.c.inc | 191 +++++++++++++++- | ||
86 | tcg/ppc/tcg-target.c.inc | 108 ++++++++- | ||
87 | tcg/s390x/tcg-target.c.inc | 107 ++++++++- | ||
88 | scripts/decodetree.py | 265 ++++++++++++++++++++-- | ||
89 | tests/decode/check.sh | 24 -- | ||
90 | tests/decode/meson.build | 64 ++++++ | ||
91 | tests/meson.build | 5 +- | ||
92 | 38 files changed, 1312 insertions(+), 225 deletions(-) | ||
93 | create mode 100644 host/include/aarch64/host/load-extract-al16-al8.h | ||
94 | create mode 100644 host/include/aarch64/host/store-insert-al16.h | ||
95 | create mode 100644 host/include/generic/host/load-extract-al16-al8.h | ||
96 | create mode 100644 host/include/generic/host/store-insert-al16.h | ||
97 | create mode 100644 host/include/x86_64/host/atomic128-ldst.h | ||
98 | create mode 100644 host/include/x86_64/host/load-extract-al16-al8.h | ||
99 | create mode 100644 tests/decode/err_field10.decode | ||
100 | create mode 100644 tests/decode/err_field7.decode | ||
101 | create mode 100644 tests/decode/err_field8.decode | ||
102 | create mode 100644 tests/decode/err_field9.decode | ||
103 | create mode 100644 tests/decode/succ_named_field.decode | ||
104 | delete mode 100755 tests/decode/check.sh | ||
105 | create mode 100644 tests/decode/meson.build | diff view generated by jsdifflib |
1 | This is only used for 32-bit hosts. | 1 | The first move was incorrectly using TCG_TYPE_I32 while the second |
---|---|---|---|
2 | move was correctly using TCG_TYPE_REG. This prevents a 64-bit host | ||
3 | from moving all 128-bits of the return value. | ||
2 | 4 | ||
5 | Fixes: ebebea53ef8 ("tcg: Support TCG_TYPE_I128 in tcg_out_{ld,st}_helper_{args,ret}") | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
4 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | 7 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
5 | --- | 8 | --- |
6 | tcg/ppc/tcg-target.inc.c | 9 +++++++++ | 9 | tcg/tcg.c | 4 ++-- |
7 | 1 file changed, 9 insertions(+) | 10 | 1 file changed, 2 insertions(+), 2 deletions(-) |
8 | 11 | ||
9 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 12 | diff --git a/tcg/tcg.c b/tcg/tcg.c |
10 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
11 | --- a/tcg/ppc/tcg-target.inc.c | 14 | --- a/tcg/tcg.c |
12 | +++ b/tcg/ppc/tcg-target.inc.c | 15 | +++ b/tcg/tcg.c |
13 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | 16 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst, |
14 | } | 17 | mov[0].dst = ldst->datalo_reg; |
15 | break; | 18 | mov[0].src = |
16 | 19 | tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN); | |
17 | + case INDEX_op_dup2_vec: | 20 | - mov[0].dst_type = TCG_TYPE_I32; |
18 | + assert(TCG_TARGET_REG_BITS == 32); | 21 | - mov[0].src_type = TCG_TYPE_I32; |
19 | + /* With inputs a1 = xLxx, a2 = xHxx */ | 22 | + mov[0].dst_type = TCG_TYPE_REG; |
20 | + tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1)); /* a0 = xxHL */ | 23 | + mov[0].src_type = TCG_TYPE_REG; |
21 | + tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8); /* tmp = HLxx */ | 24 | mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64; |
22 | + tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8); /* a0 = HLHL */ | 25 | |
23 | + return; | 26 | mov[1].dst = ldst->datahi_reg; |
24 | + | ||
25 | case INDEX_op_ppc_mrgh_vec: | ||
26 | insn = mrgh_op[vece]; | ||
27 | break; | ||
28 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
29 | case INDEX_op_ppc_mulou_vec: | ||
30 | case INDEX_op_ppc_pkum_vec: | ||
31 | case INDEX_op_ppc_rotl_vec: | ||
32 | + case INDEX_op_dup2_vec: | ||
33 | return &v_v_v; | ||
34 | case INDEX_op_not_vec: | ||
35 | case INDEX_op_dup_vec: | ||
36 | -- | 27 | -- |
37 | 2.17.1 | 28 | 2.34.1 |
38 | |||
39 | diff view generated by jsdifflib |
1 | Introduce an enum to hold base < 2.06 < 3.00. Use macros to | 1 | PAGE_WRITE is current writability, as modified by TB protection; |
---|---|---|---|
2 | preserve the existing have_isa_2_06 and have_isa_3_00 predicates. | 2 | PAGE_WRITE_ORG is the original page writability. |
3 | 3 | ||
4 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> | 4 | Fixes: cdfac37be0d ("accel/tcg: Honor atomicity of loads") |
5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
6 | --- | 7 | --- |
7 | tcg/ppc/tcg-target.h | 12 ++++++++++-- | 8 | accel/tcg/ldst_atomicity.c.inc | 4 ++-- |
8 | tcg/ppc/tcg-target.inc.c | 8 ++++---- | 9 | 1 file changed, 2 insertions(+), 2 deletions(-) |
9 | 2 files changed, 14 insertions(+), 6 deletions(-) | ||
10 | 10 | ||
11 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | 11 | diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc |
12 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/tcg/ppc/tcg-target.h | 13 | --- a/accel/tcg/ldst_atomicity.c.inc |
14 | +++ b/tcg/ppc/tcg-target.h | 14 | +++ b/accel/tcg/ldst_atomicity.c.inc |
15 | @@ -XXX,XX +XXX,XX @@ typedef enum { | 15 | @@ -XXX,XX +XXX,XX @@ static uint64_t load_atomic8_or_exit(CPUArchState *env, uintptr_t ra, void *pv) |
16 | TCG_AREG0 = TCG_REG_R27 | 16 | * another process, because the fallback start_exclusive solution |
17 | } TCGReg; | 17 | * provides no protection across processes. |
18 | 18 | */ | |
19 | -extern bool have_isa_2_06; | 19 | - if (!page_check_range(h2g(pv), 8, PAGE_WRITE)) { |
20 | -extern bool have_isa_3_00; | 20 | + if (!page_check_range(h2g(pv), 8, PAGE_WRITE_ORG)) { |
21 | +typedef enum { | 21 | uint64_t *p = __builtin_assume_aligned(pv, 8); |
22 | + tcg_isa_base, | 22 | return *p; |
23 | + tcg_isa_2_06, | ||
24 | + tcg_isa_3_00, | ||
25 | +} TCGPowerISA; | ||
26 | + | ||
27 | +extern TCGPowerISA have_isa; | ||
28 | + | ||
29 | +#define have_isa_2_06 (have_isa >= tcg_isa_2_06) | ||
30 | +#define have_isa_3_00 (have_isa >= tcg_isa_3_00) | ||
31 | |||
32 | /* optional instructions automatically implemented */ | ||
33 | #define TCG_TARGET_HAS_ext8u_i32 0 /* andi */ | ||
34 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
35 | index XXXXXXX..XXXXXXX 100644 | ||
36 | --- a/tcg/ppc/tcg-target.inc.c | ||
37 | +++ b/tcg/ppc/tcg-target.inc.c | ||
38 | @@ -XXX,XX +XXX,XX @@ | ||
39 | |||
40 | static tcg_insn_unit *tb_ret_addr; | ||
41 | |||
42 | -bool have_isa_2_06; | ||
43 | -bool have_isa_3_00; | ||
44 | +TCGPowerISA have_isa; | ||
45 | |||
46 | #define HAVE_ISA_2_06 have_isa_2_06 | ||
47 | #define HAVE_ISEL have_isa_2_06 | ||
48 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | ||
49 | unsigned long hwcap = qemu_getauxval(AT_HWCAP); | ||
50 | unsigned long hwcap2 = qemu_getauxval(AT_HWCAP2); | ||
51 | |||
52 | + have_isa = tcg_isa_base; | ||
53 | if (hwcap & PPC_FEATURE_ARCH_2_06) { | ||
54 | - have_isa_2_06 = true; | ||
55 | + have_isa = tcg_isa_2_06; | ||
56 | } | 23 | } |
57 | #ifdef PPC_FEATURE2_ARCH_3_00 | 24 | @@ -XXX,XX +XXX,XX @@ static Int128 load_atomic16_or_exit(CPUArchState *env, uintptr_t ra, void *pv) |
58 | if (hwcap2 & PPC_FEATURE2_ARCH_3_00) { | 25 | * another process, because the fallback start_exclusive solution |
59 | - have_isa_3_00 = true; | 26 | * provides no protection across processes. |
60 | + have_isa = tcg_isa_3_00; | 27 | */ |
28 | - if (!page_check_range(h2g(p), 16, PAGE_WRITE)) { | ||
29 | + if (!page_check_range(h2g(p), 16, PAGE_WRITE_ORG)) { | ||
30 | return *p; | ||
61 | } | 31 | } |
62 | #endif | 32 | #endif |
63 | |||
64 | -- | 33 | -- |
65 | 2.17.1 | 34 | 2.34.1 |
66 | |||
67 | diff view generated by jsdifflib |
1 | These new instructions are conditional on MSR.VEC for TX=1, | 1 | Older versions of clang have missing runtime functions for arithmetic |
---|---|---|---|
2 | so we can consider these Altivec instructions. | 2 | with -fsanitize=undefined (see 464e3671f9d5c), so we cannot use |
3 | __int128_t for implementing Int128. But __int128_t is present, | ||
4 | data movement works, and it can be used for atomic128. | ||
3 | 5 | ||
4 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> | 6 | Probe for both CONFIG_INT128_TYPE and CONFIG_INT128, adjust |
7 | qemu/int128.h to define Int128Alias if CONFIG_INT128_TYPE, | ||
8 | and adjust the meson probe for atomics to use has_int128_type. | ||
9 | |||
10 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
6 | --- | 12 | --- |
7 | tcg/ppc/tcg-target.inc.c | 28 ++++++++++++++++++++++++++-- | 13 | meson.build | 15 ++++++++++----- |
8 | 1 file changed, 26 insertions(+), 2 deletions(-) | 14 | include/qemu/int128.h | 4 ++-- |
15 | 2 files changed, 12 insertions(+), 7 deletions(-) | ||
9 | 16 | ||
10 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 17 | diff --git a/meson.build b/meson.build |
11 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
12 | --- a/tcg/ppc/tcg-target.inc.c | 19 | --- a/meson.build |
13 | +++ b/tcg/ppc/tcg-target.inc.c | 20 | +++ b/meson.build |
14 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | 21 | @@ -XXX,XX +XXX,XX @@ config_host_data.set('CONFIG_ATOMIC64', cc.links(''' |
15 | 22 | return 0; | |
16 | #define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */ | 23 | }''')) |
17 | #define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */ | 24 | |
18 | +#define XXSPLTIB (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */ | 25 | -has_int128 = cc.links(''' |
19 | 26 | +has_int128_type = cc.compiles(''' | |
20 | #define MFVSRD (XO31(51) | 1) /* v2.07, force sx=1 */ | 27 | + __int128_t a; |
21 | #define MFVSRWZ (XO31(115) | 1) /* v2.07, force sx=1 */ | 28 | + __uint128_t b; |
22 | #define MTVSRD (XO31(179) | 1) /* v2.07, force tx=1 */ | 29 | + int main(void) { b = a; }''') |
23 | #define MTVSRWZ (XO31(243) | 1) /* v2.07, force tx=1 */ | 30 | +config_host_data.set('CONFIG_INT128_TYPE', has_int128_type) |
24 | +#define MTVSRDD (XO31(435) | 1) /* v3.00, force tx=1 */ | 31 | + |
25 | +#define MTVSRWS (XO31(403) | 1) /* v3.00, force tx=1 */ | 32 | +has_int128 = has_int128_type and cc.links(''' |
26 | 33 | __int128_t a; | |
27 | #define RT(r) ((r)<<21) | 34 | __uint128_t b; |
28 | #define RS(r) ((r)<<21) | 35 | int main (void) { |
29 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret, | 36 | @@ -XXX,XX +XXX,XX @@ has_int128 = cc.links(''' |
30 | return; | 37 | a = a * a; |
38 | return 0; | ||
39 | }''') | ||
40 | - | ||
41 | config_host_data.set('CONFIG_INT128', has_int128) | ||
42 | |||
43 | -if has_int128 | ||
44 | +if has_int128_type | ||
45 | # "do we have 128-bit atomics which are handled inline and specifically not | ||
46 | # via libatomic". The reason we can't use libatomic is documented in the | ||
47 | # comment starting "GCC is a house divided" in include/qemu/atomic128.h. | ||
48 | @@ -XXX,XX +XXX,XX @@ if has_int128 | ||
49 | # __alignof(unsigned __int128) for the host. | ||
50 | atomic_test_128 = ''' | ||
51 | int main(int ac, char **av) { | ||
52 | - unsigned __int128 *p = __builtin_assume_aligned(av[ac - 1], 16); | ||
53 | + __uint128_t *p = __builtin_assume_aligned(av[ac - 1], 16); | ||
54 | p[1] = __atomic_load_n(&p[0], __ATOMIC_RELAXED); | ||
55 | __atomic_store_n(&p[2], p[3], __ATOMIC_RELAXED); | ||
56 | __atomic_compare_exchange_n(&p[4], &p[5], p[6], 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); | ||
57 | @@ -XXX,XX +XXX,XX @@ if has_int128 | ||
58 | config_host_data.set('CONFIG_CMPXCHG128', cc.links(''' | ||
59 | int main(void) | ||
60 | { | ||
61 | - unsigned __int128 x = 0, y = 0; | ||
62 | + __uint128_t x = 0, y = 0; | ||
63 | __sync_val_compare_and_swap_16(&x, y, x); | ||
64 | return 0; | ||
31 | } | 65 | } |
32 | } | 66 | diff --git a/include/qemu/int128.h b/include/qemu/int128.h |
33 | + if (have_isa_3_00 && val == (tcg_target_long)dup_const(MO_8, val)) { | 67 | index XXXXXXX..XXXXXXX 100644 |
34 | + tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11)); | 68 | --- a/include/qemu/int128.h |
35 | + return; | 69 | +++ b/include/qemu/int128.h |
36 | + } | 70 | @@ -XXX,XX +XXX,XX @@ static inline void bswap128s(Int128 *s) |
37 | 71 | * a possible structure and the native types. Ease parameter passing | |
38 | /* | 72 | * via use of the transparent union extension. |
39 | * Otherwise we must load the value from the constant pool. | 73 | */ |
40 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, | 74 | -#ifdef CONFIG_INT128 |
41 | TCGReg dst, TCGReg src) | 75 | +#ifdef CONFIG_INT128_TYPE |
42 | { | 76 | typedef union { |
43 | tcg_debug_assert(dst >= TCG_REG_V0); | 77 | __uint128_t u; |
44 | - tcg_debug_assert(src >= TCG_REG_V0); | 78 | __int128_t i; |
45 | + | 79 | @@ -XXX,XX +XXX,XX @@ typedef union { |
46 | + /* Splat from integer reg allowed via constraints for v3.00. */ | 80 | } Int128Alias __attribute__((transparent_union)); |
47 | + if (src < TCG_REG_V0) { | 81 | #else |
48 | + tcg_debug_assert(have_isa_3_00); | 82 | typedef Int128 Int128Alias; |
49 | + switch (vece) { | 83 | -#endif /* CONFIG_INT128 */ |
50 | + case MO_64: | 84 | +#endif /* CONFIG_INT128_TYPE */ |
51 | + tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src)); | 85 | |
52 | + return true; | 86 | #endif /* INT128_H */ |
53 | + case MO_32: | ||
54 | + tcg_out32(s, MTVSRWS | VRT(dst) | RA(src)); | ||
55 | + return true; | ||
56 | + default: | ||
57 | + /* Fail, so that we fall back on either dupm or mov+dup. */ | ||
58 | + return false; | ||
59 | + } | ||
60 | + } | ||
61 | |||
62 | /* | ||
63 | * Recall we use (or emulate) VSX integer loads, so the integer is | ||
64 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
65 | static const TCGTargetOpDef sub2 | ||
66 | = { .args_ct_str = { "r", "r", "rI", "rZM", "r", "r" } }; | ||
67 | static const TCGTargetOpDef v_r = { .args_ct_str = { "v", "r" } }; | ||
68 | + static const TCGTargetOpDef v_vr = { .args_ct_str = { "v", "vr" } }; | ||
69 | static const TCGTargetOpDef v_v = { .args_ct_str = { "v", "v" } }; | ||
70 | static const TCGTargetOpDef v_v_v = { .args_ct_str = { "v", "v", "v" } }; | ||
71 | static const TCGTargetOpDef v_v_v_v | ||
72 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
73 | return &v_v_v; | ||
74 | case INDEX_op_not_vec: | ||
75 | case INDEX_op_neg_vec: | ||
76 | - case INDEX_op_dup_vec: | ||
77 | return &v_v; | ||
78 | + case INDEX_op_dup_vec: | ||
79 | + return have_isa_3_00 ? &v_vr : &v_v; | ||
80 | case INDEX_op_ld_vec: | ||
81 | case INDEX_op_st_vec: | ||
82 | case INDEX_op_dupm_vec: | ||
83 | -- | 87 | -- |
84 | 2.17.1 | 88 | 2.34.1 |
85 | |||
86 | diff view generated by jsdifflib |
1 | These new instructions are a mix of those like LXSD that are | 1 | With CPUINFO_ATOMIC_VMOVDQA, we can perform proper atomic |
---|---|---|---|
2 | only conditional only on MSR.VEC and those like LXV that are | 2 | load/store without cmpxchg16b. |
3 | conditional on MSR.VEC for TX=1. Thus, in the end, we can | ||
4 | consider all of these as Altivec instructions. | ||
5 | 3 | ||
6 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> | 4 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> |
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
8 | --- | 6 | --- |
9 | tcg/ppc/tcg-target.inc.c | 47 ++++++++++++++++++++++++++++++++-------- | 7 | host/include/x86_64/host/atomic128-ldst.h | 68 +++++++++++++++++++++++ |
10 | 1 file changed, 38 insertions(+), 9 deletions(-) | 8 | 1 file changed, 68 insertions(+) |
9 | create mode 100644 host/include/x86_64/host/atomic128-ldst.h | ||
11 | 10 | ||
12 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 11 | diff --git a/host/include/x86_64/host/atomic128-ldst.h b/host/include/x86_64/host/atomic128-ldst.h |
13 | index XXXXXXX..XXXXXXX 100644 | 12 | new file mode 100644 |
14 | --- a/tcg/ppc/tcg-target.inc.c | 13 | index XXXXXXX..XXXXXXX |
15 | +++ b/tcg/ppc/tcg-target.inc.c | 14 | --- /dev/null |
16 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | 15 | +++ b/host/include/x86_64/host/atomic128-ldst.h |
17 | #define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */ | 16 | @@ -XXX,XX +XXX,XX @@ |
18 | #define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */ | 17 | +/* |
19 | #define LXSIWZX (XO31(12) | 1) /* v2.07, force tx=1 */ | 18 | + * SPDX-License-Identifier: GPL-2.0-or-later |
20 | +#define LXV (OPCD(61) | 8 | 1) /* v3.00, force tx=1 */ | 19 | + * Load/store for 128-bit atomic operations, x86_64 version. |
21 | +#define LXSD (OPCD(57) | 2) /* v3.00 */ | 20 | + * |
22 | +#define LXVWSX (XO31(364) | 1) /* v3.00, force tx=1 */ | 21 | + * Copyright (C) 2023 Linaro, Ltd. |
23 | 22 | + * | |
24 | #define STVX XO31(231) | 23 | + * See docs/devel/atomics.rst for discussion about the guarantees each |
25 | #define STVEWX XO31(199) | 24 | + * atomic primitive is meant to provide. |
26 | #define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */ | 25 | + */ |
27 | #define STXSIWX (XO31(140) | 1) /* v2.07, force sx=1 */ | 26 | + |
28 | +#define STXV (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */ | 27 | +#ifndef AARCH64_ATOMIC128_LDST_H |
29 | +#define STXSD (OPCD(61) | 2) /* v3.00 */ | 28 | +#define AARCH64_ATOMIC128_LDST_H |
30 | 29 | + | |
31 | #define VADDSBS VX4(768) | 30 | +#ifdef CONFIG_INT128_TYPE |
32 | #define VADDUBS VX4(512) | 31 | +#include "host/cpuinfo.h" |
33 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, | 32 | +#include "tcg/debug-assert.h" |
34 | TCGReg base, tcg_target_long offset) | 33 | + |
35 | { | 34 | +/* |
36 | tcg_target_long orig = offset, l0, l1, extra = 0, align = 0; | 35 | + * Through clang 16, with -mcx16, __atomic_load_n is incorrectly |
37 | - bool is_store = false; | 36 | + * expanded to a read-write operation: lock cmpxchg16b. |
38 | + bool is_int_store = false; | 37 | + */ |
39 | TCGReg rs = TCG_REG_TMP1; | 38 | + |
40 | 39 | +#define HAVE_ATOMIC128_RO likely(cpuinfo & CPUINFO_ATOMIC_VMOVDQA) | |
41 | switch (opi) { | 40 | +#define HAVE_ATOMIC128_RW 1 |
42 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, | 41 | + |
43 | break; | 42 | +static inline Int128 atomic16_read_ro(const Int128 *ptr) |
44 | } | 43 | +{ |
45 | break; | 44 | + Int128Alias r; |
46 | + case LXSD: | 45 | + |
47 | + case STXSD: | 46 | + tcg_debug_assert(HAVE_ATOMIC128_RO); |
48 | + align = 3; | 47 | + asm("vmovdqa %1, %0" : "=x" (r.i) : "m" (*ptr)); |
49 | + break; | 48 | + |
50 | + case LXV: | 49 | + return r.s; |
51 | + case STXV: | 50 | +} |
52 | + align = 15; | 51 | + |
53 | + break; | 52 | +static inline Int128 atomic16_read_rw(Int128 *ptr) |
54 | case STD: | 53 | +{ |
55 | align = 3; | 54 | + __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16); |
56 | /* FALLTHRU */ | 55 | + Int128Alias r; |
57 | case STB: case STH: case STW: | 56 | + |
58 | - is_store = true; | 57 | + if (HAVE_ATOMIC128_RO) { |
59 | + is_int_store = true; | 58 | + asm("vmovdqa %1, %0" : "=x" (r.i) : "m" (*ptr_align)); |
60 | break; | 59 | + } else { |
61 | } | 60 | + r.i = __sync_val_compare_and_swap_16(ptr_align, 0, 0); |
62 | 61 | + } | |
63 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, | 62 | + return r.s; |
64 | if (rs == base) { | 63 | +} |
65 | rs = TCG_REG_R0; | 64 | + |
66 | } | 65 | +static inline void atomic16_set(Int128 *ptr, Int128 val) |
67 | - tcg_debug_assert(!is_store || rs != rt); | 66 | +{ |
68 | + tcg_debug_assert(!is_int_store || rs != rt); | 67 | + __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16); |
69 | tcg_out_movi(s, TCG_TYPE_PTR, rs, orig); | 68 | + Int128Alias new = { .s = val }; |
70 | tcg_out32(s, opx | TAB(rt & 31, base, rs)); | 69 | + |
71 | return; | 70 | + if (HAVE_ATOMIC128_RO) { |
72 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, | 71 | + asm("vmovdqa %1, %0" : "=m"(*ptr_align) : "x" (new.i)); |
73 | case TCG_TYPE_V64: | 72 | + } else { |
74 | tcg_debug_assert(ret >= TCG_REG_V0); | 73 | + __int128_t old; |
75 | if (have_vsx) { | 74 | + do { |
76 | - tcg_out_mem_long(s, 0, LXSDX, ret, base, offset); | 75 | + old = *ptr_align; |
77 | + tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX, | 76 | + } while (!__sync_bool_compare_and_swap_16(ptr_align, old, new.i)); |
78 | + ret, base, offset); | 77 | + } |
79 | break; | 78 | +} |
80 | } | 79 | +#else |
81 | tcg_debug_assert((offset & 7) == 0); | 80 | +/* Provide QEMU_ERROR stubs. */ |
82 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, | 81 | +#include "host/include/generic/host/atomic128-ldst.h" |
83 | case TCG_TYPE_V128: | 82 | +#endif |
84 | tcg_debug_assert(ret >= TCG_REG_V0); | 83 | + |
85 | tcg_debug_assert((offset & 15) == 0); | 84 | +#endif /* AARCH64_ATOMIC128_LDST_H */ |
86 | - tcg_out_mem_long(s, 0, LVX, ret, base, offset); | ||
87 | + tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0, | ||
88 | + LVX, ret, base, offset); | ||
89 | break; | ||
90 | default: | ||
91 | g_assert_not_reached(); | ||
92 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, | ||
93 | case TCG_TYPE_V64: | ||
94 | tcg_debug_assert(arg >= TCG_REG_V0); | ||
95 | if (have_vsx) { | ||
96 | - tcg_out_mem_long(s, 0, STXSDX, arg, base, offset); | ||
97 | + tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0, | ||
98 | + STXSDX, arg, base, offset); | ||
99 | break; | ||
100 | } | ||
101 | tcg_debug_assert((offset & 7) == 0); | ||
102 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, | ||
103 | break; | ||
104 | case TCG_TYPE_V128: | ||
105 | tcg_debug_assert(arg >= TCG_REG_V0); | ||
106 | - tcg_out_mem_long(s, 0, STVX, arg, base, offset); | ||
107 | + tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0, | ||
108 | + STVX, arg, base, offset); | ||
109 | break; | ||
110 | default: | ||
111 | g_assert_not_reached(); | ||
112 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, | ||
113 | tcg_debug_assert(out >= TCG_REG_V0); | ||
114 | switch (vece) { | ||
115 | case MO_8: | ||
116 | - tcg_out_mem_long(s, 0, LVEBX, out, base, offset); | ||
117 | + if (have_isa_3_00) { | ||
118 | + tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16); | ||
119 | + } else { | ||
120 | + tcg_out_mem_long(s, 0, LVEBX, out, base, offset); | ||
121 | + } | ||
122 | elt = extract32(offset, 0, 4); | ||
123 | #ifndef HOST_WORDS_BIGENDIAN | ||
124 | elt ^= 15; | ||
125 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, | ||
126 | break; | ||
127 | case MO_16: | ||
128 | tcg_debug_assert((offset & 1) == 0); | ||
129 | - tcg_out_mem_long(s, 0, LVEHX, out, base, offset); | ||
130 | + if (have_isa_3_00) { | ||
131 | + tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16); | ||
132 | + } else { | ||
133 | + tcg_out_mem_long(s, 0, LVEHX, out, base, offset); | ||
134 | + } | ||
135 | elt = extract32(offset, 1, 3); | ||
136 | #ifndef HOST_WORDS_BIGENDIAN | ||
137 | elt ^= 7; | ||
138 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, | ||
139 | tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16)); | ||
140 | break; | ||
141 | case MO_32: | ||
142 | + if (have_isa_3_00) { | ||
143 | + tcg_out_mem_long(s, 0, LXVWSX, out, base, offset); | ||
144 | + break; | ||
145 | + } | ||
146 | tcg_debug_assert((offset & 3) == 0); | ||
147 | tcg_out_mem_long(s, 0, LVEWX, out, base, offset); | ||
148 | elt = extract32(offset, 2, 2); | ||
149 | -- | 85 | -- |
150 | 2.17.1 | 86 | 2.34.1 |
151 | 87 | ||
152 | 88 | diff view generated by jsdifflib |
1 | The VSX instruction set instructions include double-word loads and | 1 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
---|---|---|---|
2 | stores, double-word load and splat, double-word permute, and bit | 2 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
3 | select. All of which require multiple operations in the Altivec | 3 | --- |
4 | instruction set. | 4 | tcg/i386/tcg-target.h | 4 +- |
5 | tcg/i386/tcg-target.c.inc | 191 +++++++++++++++++++++++++++++++++++++- | ||
6 | 2 files changed, 190 insertions(+), 5 deletions(-) | ||
5 | 7 | ||
6 | Because the VSX registers map %vsr32 to %vr0, and we have no current | 8 | diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h |
7 | intention or need to use vector registers outside %vr0-%vr19, force | ||
8 | on the {ax,bx,cx,tx} bits within the added VSX insns so that we don't | ||
9 | have to otherwise modify the VR[TABC] macros. | ||
10 | |||
11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
13 | --- | ||
14 | tcg/ppc/tcg-target.h | 5 ++-- | ||
15 | tcg/ppc/tcg-target.inc.c | 52 ++++++++++++++++++++++++++++++++++++---- | ||
16 | 2 files changed, 51 insertions(+), 6 deletions(-) | ||
17 | |||
18 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | ||
19 | index XXXXXXX..XXXXXXX 100644 | 9 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/tcg/ppc/tcg-target.h | 10 | --- a/tcg/i386/tcg-target.h |
21 | +++ b/tcg/ppc/tcg-target.h | 11 | +++ b/tcg/i386/tcg-target.h |
22 | @@ -XXX,XX +XXX,XX @@ typedef enum { | 12 | @@ -XXX,XX +XXX,XX @@ typedef enum { |
23 | 13 | #define have_avx1 (cpuinfo & CPUINFO_AVX1) | |
24 | extern TCGPowerISA have_isa; | 14 | #define have_avx2 (cpuinfo & CPUINFO_AVX2) |
25 | extern bool have_altivec; | 15 | #define have_movbe (cpuinfo & CPUINFO_MOVBE) |
26 | +extern bool have_vsx; | 16 | -#define have_atomic16 (cpuinfo & CPUINFO_ATOMIC_VMOVDQA) |
27 | 17 | ||
28 | #define have_isa_2_06 (have_isa >= tcg_isa_2_06) | 18 | /* |
29 | #define have_isa_3_00 (have_isa >= tcg_isa_3_00) | 19 | * There are interesting instructions in AVX512, so long as we have AVX512VL, |
30 | @@ -XXX,XX +XXX,XX @@ extern bool have_altivec; | 20 | @@ -XXX,XX +XXX,XX @@ typedef enum { |
31 | * instruction and substituting two 32-bit stores makes the generated | 21 | #define TCG_TARGET_HAS_qemu_st8_i32 1 |
32 | * code quite large. | 22 | #endif |
23 | |||
24 | -#define TCG_TARGET_HAS_qemu_ldst_i128 0 | ||
25 | +#define TCG_TARGET_HAS_qemu_ldst_i128 \ | ||
26 | + (TCG_TARGET_REG_BITS == 64 && (cpuinfo & CPUINFO_ATOMIC_VMOVDQA)) | ||
27 | |||
28 | /* We do not support older SSE systems, only beginning with AVX1. */ | ||
29 | #define TCG_TARGET_HAS_v64 have_avx1 | ||
30 | diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc | ||
31 | index XXXXXXX..XXXXXXX 100644 | ||
32 | --- a/tcg/i386/tcg-target.c.inc | ||
33 | +++ b/tcg/i386/tcg-target.c.inc | ||
34 | @@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = { | ||
35 | #endif | ||
36 | }; | ||
37 | |||
38 | +#define TCG_TMP_VEC TCG_REG_XMM5 | ||
39 | + | ||
40 | static const int tcg_target_call_iarg_regs[] = { | ||
41 | #if TCG_TARGET_REG_BITS == 64 | ||
42 | #if defined(_WIN64) | ||
43 | @@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) | ||
44 | #define OPC_PCMPGTW (0x65 | P_EXT | P_DATA16) | ||
45 | #define OPC_PCMPGTD (0x66 | P_EXT | P_DATA16) | ||
46 | #define OPC_PCMPGTQ (0x37 | P_EXT38 | P_DATA16) | ||
47 | +#define OPC_PEXTRD (0x16 | P_EXT3A | P_DATA16) | ||
48 | +#define OPC_PINSRD (0x22 | P_EXT3A | P_DATA16) | ||
49 | #define OPC_PMAXSB (0x3c | P_EXT38 | P_DATA16) | ||
50 | #define OPC_PMAXSW (0xee | P_EXT | P_DATA16) | ||
51 | #define OPC_PMAXSD (0x3d | P_EXT38 | P_DATA16) | ||
52 | @@ -XXX,XX +XXX,XX @@ typedef struct { | ||
53 | |||
54 | bool tcg_target_has_memory_bswap(MemOp memop) | ||
55 | { | ||
56 | - return have_movbe; | ||
57 | + TCGAtomAlign aa; | ||
58 | + | ||
59 | + if (!have_movbe) { | ||
60 | + return false; | ||
61 | + } | ||
62 | + if ((memop & MO_SIZE) < MO_128) { | ||
63 | + return true; | ||
64 | + } | ||
65 | + | ||
66 | + /* | ||
67 | + * Reject 16-byte memop with 16-byte atomicity, i.e. VMOVDQA, | ||
68 | + * but do allow a pair of 64-bit operations, i.e. MOVBEQ. | ||
69 | + */ | ||
70 | + aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true); | ||
71 | + return aa.atom < MO_128; | ||
72 | } | ||
73 | |||
74 | /* | ||
75 | @@ -XXX,XX +XXX,XX @@ static const TCGLdstHelperParam ldst_helper_param = { | ||
76 | static const TCGLdstHelperParam ldst_helper_param = { }; | ||
77 | #endif | ||
78 | |||
79 | +static void tcg_out_vec_to_pair(TCGContext *s, TCGType type, | ||
80 | + TCGReg l, TCGReg h, TCGReg v) | ||
81 | +{ | ||
82 | + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; | ||
83 | + | ||
84 | + /* vpmov{d,q} %v, %l */ | ||
85 | + tcg_out_vex_modrm(s, OPC_MOVD_EyVy + rexw, v, 0, l); | ||
86 | + /* vpextr{d,q} $1, %v, %h */ | ||
87 | + tcg_out_vex_modrm(s, OPC_PEXTRD + rexw, v, 0, h); | ||
88 | + tcg_out8(s, 1); | ||
89 | +} | ||
90 | + | ||
91 | +static void tcg_out_pair_to_vec(TCGContext *s, TCGType type, | ||
92 | + TCGReg v, TCGReg l, TCGReg h) | ||
93 | +{ | ||
94 | + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; | ||
95 | + | ||
96 | + /* vmov{d,q} %l, %v */ | ||
97 | + tcg_out_vex_modrm(s, OPC_MOVD_VyEy + rexw, v, 0, l); | ||
98 | + /* vpinsr{d,q} $1, %h, %v, %v */ | ||
99 | + tcg_out_vex_modrm(s, OPC_PINSRD + rexw, v, v, h); | ||
100 | + tcg_out8(s, 1); | ||
101 | +} | ||
102 | + | ||
103 | /* | ||
104 | * Generate code for the slow path for a load at the end of block | ||
33 | */ | 105 | */ |
34 | -#define TCG_TARGET_HAS_v64 0 | 106 | @@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, |
35 | +#define TCG_TARGET_HAS_v64 have_vsx | 107 | { |
36 | #define TCG_TARGET_HAS_v128 have_altivec | 108 | TCGLabelQemuLdst *ldst = NULL; |
37 | #define TCG_TARGET_HAS_v256 0 | 109 | MemOp opc = get_memop(oi); |
38 | 110 | + MemOp s_bits = opc & MO_SIZE; | |
39 | @@ -XXX,XX +XXX,XX @@ extern bool have_altivec; | 111 | unsigned a_mask; |
40 | #define TCG_TARGET_HAS_mul_vec 1 | 112 | |
41 | #define TCG_TARGET_HAS_sat_vec 1 | 113 | #ifdef CONFIG_SOFTMMU |
42 | #define TCG_TARGET_HAS_minmax_vec 1 | 114 | @@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, |
43 | -#define TCG_TARGET_HAS_bitsel_vec 0 | 115 | *h = x86_guest_base; |
44 | +#define TCG_TARGET_HAS_bitsel_vec have_vsx | 116 | #endif |
45 | #define TCG_TARGET_HAS_cmpsel_vec 0 | 117 | h->base = addrlo; |
46 | 118 | - h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false); | |
47 | void flush_icache_range(uintptr_t start, uintptr_t stop); | 119 | + h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128); |
48 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 120 | a_mask = (1 << h->aa.align) - 1; |
49 | index XXXXXXX..XXXXXXX 100644 | 121 | |
50 | --- a/tcg/ppc/tcg-target.inc.c | 122 | #ifdef CONFIG_SOFTMMU |
51 | +++ b/tcg/ppc/tcg-target.inc.c | 123 | @@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, |
52 | @@ -XXX,XX +XXX,XX @@ static tcg_insn_unit *tb_ret_addr; | 124 | TCGType tlbtype = TCG_TYPE_I32; |
53 | TCGPowerISA have_isa; | 125 | int trexw = 0, hrexw = 0, tlbrexw = 0; |
54 | static bool have_isel; | 126 | unsigned mem_index = get_mmuidx(oi); |
55 | bool have_altivec; | 127 | - unsigned s_bits = opc & MO_SIZE; |
56 | +bool have_vsx; | 128 | unsigned s_mask = (1 << s_bits) - 1; |
57 | 129 | int tlb_mask; | |
58 | #ifndef CONFIG_SOFTMMU | 130 | |
59 | #define TCG_GUEST_BASE_REG 30 | 131 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, |
60 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | 132 | h.base, h.index, 0, h.ofs + 4); |
61 | #define LVEBX XO31(7) | 133 | } |
62 | #define LVEHX XO31(39) | 134 | break; |
63 | #define LVEWX XO31(71) | 135 | + |
64 | +#define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */ | 136 | + case MO_128: |
65 | +#define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */ | 137 | + tcg_debug_assert(TCG_TARGET_REG_BITS == 64); |
66 | 138 | + | |
67 | #define STVX XO31(231) | 139 | + /* |
68 | #define STVEWX XO31(199) | 140 | + * Without 16-byte atomicity, use integer regs. |
69 | +#define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */ | 141 | + * That is where we want the data, and it allows bswaps. |
70 | 142 | + */ | |
71 | #define VADDSBS VX4(768) | 143 | + if (h.aa.atom < MO_128) { |
72 | #define VADDUBS VX4(512) | 144 | + if (use_movbe) { |
73 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | 145 | + TCGReg t = datalo; |
74 | 146 | + datalo = datahi; | |
75 | #define VSLDOI VX4(44) | 147 | + datahi = t; |
76 | 148 | + } | |
77 | +#define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */ | 149 | + if (h.base == datalo || h.index == datalo) { |
78 | +#define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */ | 150 | + tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, datahi, |
79 | + | 151 | + h.base, h.index, 0, h.ofs); |
80 | #define RT(r) ((r)<<21) | 152 | + tcg_out_modrm_offset(s, movop + P_REXW + h.seg, |
81 | #define RS(r) ((r)<<21) | 153 | + datalo, datahi, 0); |
82 | #define RA(r) ((r)<<16) | 154 | + tcg_out_modrm_offset(s, movop + P_REXW + h.seg, |
83 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret, | 155 | + datahi, datahi, 8); |
84 | add = 0; | 156 | + } else { |
85 | } | 157 | + tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo, |
86 | 158 | + h.base, h.index, 0, h.ofs); | |
87 | - load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1); | 159 | + tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datahi, |
88 | - if (TCG_TARGET_REG_BITS == 64) { | 160 | + h.base, h.index, 0, h.ofs + 8); |
89 | - new_pool_l2(s, rel, s->code_ptr, add, val, val); | 161 | + } |
90 | + if (have_vsx) { | ||
91 | + load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX; | ||
92 | + load_insn |= VRT(ret) | RB(TCG_REG_TMP1); | ||
93 | + if (TCG_TARGET_REG_BITS == 64) { | ||
94 | + new_pool_label(s, val, rel, s->code_ptr, add); | ||
95 | + } else { | ||
96 | + new_pool_l2(s, rel, s->code_ptr, add, val, val); | ||
97 | + } | ||
98 | } else { | ||
99 | - new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val); | ||
100 | + load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1); | ||
101 | + if (TCG_TARGET_REG_BITS == 64) { | ||
102 | + new_pool_l2(s, rel, s->code_ptr, add, val, val); | ||
103 | + } else { | ||
104 | + new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val); | ||
105 | + } | ||
106 | } | ||
107 | |||
108 | if (USE_REG_TB) { | ||
109 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, | ||
110 | /* fallthru */ | ||
111 | case TCG_TYPE_V64: | ||
112 | tcg_debug_assert(ret >= TCG_REG_V0); | ||
113 | + if (have_vsx) { | ||
114 | + tcg_out_mem_long(s, 0, LXSDX, ret, base, offset); | ||
115 | + break; | 162 | + break; |
116 | + } | 163 | + } |
117 | tcg_debug_assert((offset & 7) == 0); | 164 | + |
118 | tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16); | 165 | + /* |
119 | if (offset & 8) { | 166 | + * With 16-byte atomicity, a vector load is required. |
120 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, | 167 | + * If we already have 16-byte alignment, then VMOVDQA always works. |
121 | /* fallthru */ | 168 | + * Else if VMOVDQU has atomicity with dynamic alignment, use that. |
122 | case TCG_TYPE_V64: | 169 | + * Else use we require a runtime test for alignment for VMOVDQA; |
123 | tcg_debug_assert(arg >= TCG_REG_V0); | 170 | + * use VMOVDQU on the unaligned nonatomic path for simplicity. |
124 | + if (have_vsx) { | 171 | + */ |
125 | + tcg_out_mem_long(s, 0, STXSDX, arg, base, offset); | 172 | + if (h.aa.align >= MO_128) { |
173 | + tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_VxWx + h.seg, | ||
174 | + TCG_TMP_VEC, 0, | ||
175 | + h.base, h.index, 0, h.ofs); | ||
176 | + } else if (cpuinfo & CPUINFO_ATOMIC_VMOVDQU) { | ||
177 | + tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_VxWx + h.seg, | ||
178 | + TCG_TMP_VEC, 0, | ||
179 | + h.base, h.index, 0, h.ofs); | ||
180 | + } else { | ||
181 | + TCGLabel *l1 = gen_new_label(); | ||
182 | + TCGLabel *l2 = gen_new_label(); | ||
183 | + | ||
184 | + tcg_out_testi(s, h.base, 15); | ||
185 | + tcg_out_jxx(s, JCC_JNE, l1, true); | ||
186 | + | ||
187 | + tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_VxWx + h.seg, | ||
188 | + TCG_TMP_VEC, 0, | ||
189 | + h.base, h.index, 0, h.ofs); | ||
190 | + tcg_out_jxx(s, JCC_JMP, l2, true); | ||
191 | + | ||
192 | + tcg_out_label(s, l1); | ||
193 | + tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_VxWx + h.seg, | ||
194 | + TCG_TMP_VEC, 0, | ||
195 | + h.base, h.index, 0, h.ofs); | ||
196 | + tcg_out_label(s, l2); | ||
197 | + } | ||
198 | + tcg_out_vec_to_pair(s, TCG_TYPE_I64, datalo, datahi, TCG_TMP_VEC); | ||
199 | + break; | ||
200 | + | ||
201 | default: | ||
202 | g_assert_not_reached(); | ||
203 | } | ||
204 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, | ||
205 | h.base, h.index, 0, h.ofs + 4); | ||
206 | } | ||
207 | break; | ||
208 | + | ||
209 | + case MO_128: | ||
210 | + tcg_debug_assert(TCG_TARGET_REG_BITS == 64); | ||
211 | + | ||
212 | + /* | ||
213 | + * Without 16-byte atomicity, use integer regs. | ||
214 | + * That is where we have the data, and it allows bswaps. | ||
215 | + */ | ||
216 | + if (h.aa.atom < MO_128) { | ||
217 | + if (use_movbe) { | ||
218 | + TCGReg t = datalo; | ||
219 | + datalo = datahi; | ||
220 | + datahi = t; | ||
221 | + } | ||
222 | + tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo, | ||
223 | + h.base, h.index, 0, h.ofs); | ||
224 | + tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datahi, | ||
225 | + h.base, h.index, 0, h.ofs + 8); | ||
126 | + break; | 226 | + break; |
127 | + } | 227 | + } |
128 | tcg_debug_assert((offset & 7) == 0); | 228 | + |
129 | if (offset & 8) { | 229 | + /* |
130 | tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8); | 230 | + * With 16-byte atomicity, a vector store is required. |
131 | @@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | 231 | + * If we already have 16-byte alignment, then VMOVDQA always works. |
132 | case INDEX_op_shri_vec: | 232 | + * Else if VMOVDQU has atomicity with dynamic alignment, use that. |
133 | case INDEX_op_sari_vec: | 233 | + * Else use we require a runtime test for alignment for VMOVDQA; |
134 | return vece <= MO_32 ? -1 : 0; | 234 | + * use VMOVDQU on the unaligned nonatomic path for simplicity. |
135 | + case INDEX_op_bitsel_vec: | 235 | + */ |
136 | + return have_vsx; | 236 | + tcg_out_pair_to_vec(s, TCG_TYPE_I64, TCG_TMP_VEC, datalo, datahi); |
237 | + if (h.aa.align >= MO_128) { | ||
238 | + tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_WxVx + h.seg, | ||
239 | + TCG_TMP_VEC, 0, | ||
240 | + h.base, h.index, 0, h.ofs); | ||
241 | + } else if (cpuinfo & CPUINFO_ATOMIC_VMOVDQU) { | ||
242 | + tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_WxVx + h.seg, | ||
243 | + TCG_TMP_VEC, 0, | ||
244 | + h.base, h.index, 0, h.ofs); | ||
245 | + } else { | ||
246 | + TCGLabel *l1 = gen_new_label(); | ||
247 | + TCGLabel *l2 = gen_new_label(); | ||
248 | + | ||
249 | + tcg_out_testi(s, h.base, 15); | ||
250 | + tcg_out_jxx(s, JCC_JNE, l1, true); | ||
251 | + | ||
252 | + tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_WxVx + h.seg, | ||
253 | + TCG_TMP_VEC, 0, | ||
254 | + h.base, h.index, 0, h.ofs); | ||
255 | + tcg_out_jxx(s, JCC_JMP, l2, true); | ||
256 | + | ||
257 | + tcg_out_label(s, l1); | ||
258 | + tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_WxVx + h.seg, | ||
259 | + TCG_TMP_VEC, 0, | ||
260 | + h.base, h.index, 0, h.ofs); | ||
261 | + tcg_out_label(s, l2); | ||
262 | + } | ||
263 | + break; | ||
264 | + | ||
137 | default: | 265 | default: |
138 | return 0; | 266 | g_assert_not_reached(); |
139 | } | 267 | } |
140 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, | 268 | @@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, |
141 | tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16)); | 269 | tcg_out_qemu_ld(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64); |
142 | break; | ||
143 | case MO_64: | ||
144 | + if (have_vsx) { | ||
145 | + tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src)); | ||
146 | + break; | ||
147 | + } | ||
148 | tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8); | ||
149 | tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8); | ||
150 | break; | ||
151 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, | ||
152 | tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16)); | ||
153 | break; | ||
154 | case MO_64: | ||
155 | + if (have_vsx) { | ||
156 | + tcg_out_mem_long(s, 0, LXVDSX, out, base, offset); | ||
157 | + break; | ||
158 | + } | ||
159 | tcg_debug_assert((offset & 7) == 0); | ||
160 | tcg_out_mem_long(s, 0, LVX, out, base, offset & -16); | ||
161 | tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8); | ||
162 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
163 | } | 270 | } |
164 | break; | 271 | break; |
165 | 272 | + case INDEX_op_qemu_ld_a32_i128: | |
166 | + case INDEX_op_bitsel_vec: | 273 | + case INDEX_op_qemu_ld_a64_i128: |
167 | + tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3])); | 274 | + tcg_debug_assert(TCG_TARGET_REG_BITS == 64); |
168 | + return; | 275 | + tcg_out_qemu_ld(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128); |
169 | + | 276 | + break; |
170 | case INDEX_op_dup2_vec: | 277 | |
171 | assert(TCG_TARGET_REG_BITS == 32); | 278 | case INDEX_op_qemu_st_a64_i32: |
172 | /* With inputs a1 = xLxx, a2 = xHxx */ | 279 | case INDEX_op_qemu_st8_a64_i32: |
173 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | 280 | @@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, |
174 | case INDEX_op_st_vec: | 281 | tcg_out_qemu_st(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64); |
175 | case INDEX_op_dupm_vec: | 282 | } |
176 | return &v_r; | 283 | break; |
177 | + case INDEX_op_bitsel_vec: | 284 | + case INDEX_op_qemu_st_a32_i128: |
178 | case INDEX_op_ppc_msum_vec: | 285 | + case INDEX_op_qemu_st_a64_i128: |
179 | return &v_v_v_v; | 286 | + tcg_debug_assert(TCG_TARGET_REG_BITS == 64); |
287 | + tcg_out_qemu_st(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128); | ||
288 | + break; | ||
289 | |||
290 | OP_32_64(mulu2): | ||
291 | tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]); | ||
292 | @@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) | ||
293 | case INDEX_op_qemu_st_a64_i64: | ||
294 | return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I4(L, L, L, L); | ||
295 | |||
296 | + case INDEX_op_qemu_ld_a32_i128: | ||
297 | + case INDEX_op_qemu_ld_a64_i128: | ||
298 | + tcg_debug_assert(TCG_TARGET_REG_BITS == 64); | ||
299 | + return C_O2_I1(r, r, L); | ||
300 | + case INDEX_op_qemu_st_a32_i128: | ||
301 | + case INDEX_op_qemu_st_a64_i128: | ||
302 | + tcg_debug_assert(TCG_TARGET_REG_BITS == 64); | ||
303 | + return C_O0_I3(L, L, L); | ||
304 | + | ||
305 | case INDEX_op_brcond2_i32: | ||
306 | return C_O0_I4(r, r, ri, ri); | ||
180 | 307 | ||
181 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | 308 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) |
182 | 309 | ||
183 | if (hwcap & PPC_FEATURE_HAS_ALTIVEC) { | 310 | s->reserved_regs = 0; |
184 | have_altivec = true; | 311 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); |
185 | + /* We only care about the portion of VSX that overlaps Altivec. */ | 312 | + tcg_regset_set_reg(s->reserved_regs, TCG_TMP_VEC); |
186 | + if (hwcap & PPC_FEATURE_HAS_VSX) { | 313 | #ifdef _WIN64 |
187 | + have_vsx = true; | 314 | /* These are call saved, and we don't save them, so don't use them. */ |
188 | + } | 315 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM6); |
189 | } | ||
190 | |||
191 | tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff; | ||
192 | -- | 316 | -- |
193 | 2.17.1 | 317 | 2.34.1 |
194 | |||
195 | diff view generated by jsdifflib |
1 | Add various bits and peaces related mostly to load and store | 1 | We will need to allocate a second general-purpose temporary. |
---|---|---|---|
2 | operations. In that context, logic, compare, and splat Altivec | 2 | Rename the existing temps to add a distinguishing number. |
3 | instructions are used, and, therefore, the support for emitting | ||
4 | them is included in this patch too. | ||
5 | 3 | ||
4 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
7 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
8 | --- | 6 | --- |
9 | tcg/ppc/tcg-target.h | 6 +- | 7 | tcg/aarch64/tcg-target.c.inc | 50 ++++++++++++++++++------------------ |
10 | tcg/ppc/tcg-target.inc.c | 472 ++++++++++++++++++++++++++++++++++++--- | 8 | 1 file changed, 25 insertions(+), 25 deletions(-) |
11 | 2 files changed, 442 insertions(+), 36 deletions(-) | ||
12 | 9 | ||
13 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | 10 | diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc |
14 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/tcg/ppc/tcg-target.h | 12 | --- a/tcg/aarch64/tcg-target.c.inc |
16 | +++ b/tcg/ppc/tcg-target.h | 13 | +++ b/tcg/aarch64/tcg-target.c.inc |
17 | @@ -XXX,XX +XXX,XX @@ extern bool have_altivec; | 14 | @@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) |
18 | #define TCG_TARGET_HAS_v128 have_altivec | 15 | return TCG_REG_X0 + slot; |
19 | #define TCG_TARGET_HAS_v256 0 | 16 | } |
20 | 17 | ||
21 | -#define TCG_TARGET_HAS_andc_vec 0 | 18 | -#define TCG_REG_TMP TCG_REG_X30 |
22 | +#define TCG_TARGET_HAS_andc_vec 1 | 19 | -#define TCG_VEC_TMP TCG_REG_V31 |
23 | #define TCG_TARGET_HAS_orc_vec 0 | 20 | +#define TCG_REG_TMP0 TCG_REG_X30 |
24 | -#define TCG_TARGET_HAS_not_vec 0 | 21 | +#define TCG_VEC_TMP0 TCG_REG_V31 |
25 | +#define TCG_TARGET_HAS_not_vec 1 | 22 | |
26 | #define TCG_TARGET_HAS_neg_vec 0 | 23 | #ifndef CONFIG_SOFTMMU |
27 | #define TCG_TARGET_HAS_abs_vec 0 | 24 | #define TCG_REG_GUEST_BASE TCG_REG_X28 |
28 | #define TCG_TARGET_HAS_shi_vec 0 | 25 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, |
29 | #define TCG_TARGET_HAS_shs_vec 0 | 26 | static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, |
30 | #define TCG_TARGET_HAS_shv_vec 0 | 27 | TCGReg r, TCGReg base, intptr_t offset) |
31 | -#define TCG_TARGET_HAS_cmp_vec 0 | ||
32 | +#define TCG_TARGET_HAS_cmp_vec 1 | ||
33 | #define TCG_TARGET_HAS_mul_vec 0 | ||
34 | #define TCG_TARGET_HAS_sat_vec 0 | ||
35 | #define TCG_TARGET_HAS_minmax_vec 0 | ||
36 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
37 | index XXXXXXX..XXXXXXX 100644 | ||
38 | --- a/tcg/ppc/tcg-target.inc.c | ||
39 | +++ b/tcg/ppc/tcg-target.inc.c | ||
40 | @@ -XXX,XX +XXX,XX @@ static const char *target_parse_constraint(TCGArgConstraint *ct, | ||
41 | ct->ct |= TCG_CT_REG; | ||
42 | ct->u.regs = 0xffffffff; | ||
43 | break; | ||
44 | + case 'v': | ||
45 | + ct->ct |= TCG_CT_REG; | ||
46 | + ct->u.regs = 0xffffffff00000000ull; | ||
47 | + break; | ||
48 | case 'L': /* qemu_ld constraint */ | ||
49 | ct->ct |= TCG_CT_REG; | ||
50 | ct->u.regs = 0xffffffff; | ||
51 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
52 | |||
53 | #define NOP ORI /* ori 0,0,0 */ | ||
54 | |||
55 | +#define LVX XO31(103) | ||
56 | +#define LVEBX XO31(7) | ||
57 | +#define LVEHX XO31(39) | ||
58 | +#define LVEWX XO31(71) | ||
59 | + | ||
60 | +#define STVX XO31(231) | ||
61 | +#define STVEWX XO31(199) | ||
62 | + | ||
63 | +#define VCMPEQUB VX4(6) | ||
64 | +#define VCMPEQUH VX4(70) | ||
65 | +#define VCMPEQUW VX4(134) | ||
66 | +#define VCMPGTSB VX4(774) | ||
67 | +#define VCMPGTSH VX4(838) | ||
68 | +#define VCMPGTSW VX4(902) | ||
69 | +#define VCMPGTUB VX4(518) | ||
70 | +#define VCMPGTUH VX4(582) | ||
71 | +#define VCMPGTUW VX4(646) | ||
72 | + | ||
73 | +#define VAND VX4(1028) | ||
74 | +#define VANDC VX4(1092) | ||
75 | +#define VNOR VX4(1284) | ||
76 | +#define VOR VX4(1156) | ||
77 | +#define VXOR VX4(1220) | ||
78 | + | ||
79 | +#define VSPLTB VX4(524) | ||
80 | +#define VSPLTH VX4(588) | ||
81 | +#define VSPLTW VX4(652) | ||
82 | +#define VSPLTISB VX4(780) | ||
83 | +#define VSPLTISH VX4(844) | ||
84 | +#define VSPLTISW VX4(908) | ||
85 | + | ||
86 | +#define VSLDOI VX4(44) | ||
87 | + | ||
88 | #define RT(r) ((r)<<21) | ||
89 | #define RS(r) ((r)<<21) | ||
90 | #define RA(r) ((r)<<16) | ||
91 | @@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, | ||
92 | intptr_t value, intptr_t addend) | ||
93 | { | 28 | { |
94 | tcg_insn_unit *target; | 29 | - TCGReg temp = TCG_REG_TMP; |
95 | + int16_t lo; | 30 | + TCGReg temp = TCG_REG_TMP0; |
96 | + int32_t hi; | 31 | |
97 | 32 | if (offset < -0xffffff || offset > 0xffffff) { | |
98 | value += addend; | 33 | tcg_out_movi(s, TCG_TYPE_PTR, temp, offset); |
99 | target = (tcg_insn_unit *)value; | 34 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd, |
100 | @@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, | ||
101 | } | ||
102 | *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc); | ||
103 | break; | ||
104 | + case R_PPC_ADDR32: | ||
105 | + /* | ||
106 | + * We are abusing this relocation type. Again, this points to | ||
107 | + * a pair of insns, lis + load. This is an absolute address | ||
108 | + * relocation for PPC32 so the lis cannot be removed. | ||
109 | + */ | ||
110 | + lo = value; | ||
111 | + hi = value - lo; | ||
112 | + if (hi + lo != value) { | ||
113 | + return false; | ||
114 | + } | ||
115 | + code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16); | ||
116 | + code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo); | ||
117 | + break; | ||
118 | default: | ||
119 | g_assert_not_reached(); | ||
120 | } | 35 | } |
121 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, | 36 | |
37 | /* Worst-case scenario, move offset to temp register, use reg offset. */ | ||
38 | - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset); | ||
39 | - tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP); | ||
40 | + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, offset); | ||
41 | + tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP0); | ||
42 | } | ||
122 | 43 | ||
123 | static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) | 44 | static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) |
124 | { | 45 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target) |
125 | - tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); | 46 | if (offset == sextract64(offset, 0, 26)) { |
126 | - if (ret != arg) { | 47 | tcg_out_insn(s, 3206, BL, offset); |
127 | - tcg_out32(s, OR | SAB(arg, ret, arg)); | 48 | } else { |
128 | + if (ret == arg) { | 49 | - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target); |
129 | + return true; | 50 | - tcg_out_insn(s, 3207, BLR, TCG_REG_TMP); |
130 | + } | 51 | + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target); |
131 | + switch (type) { | 52 | + tcg_out_insn(s, 3207, BLR, TCG_REG_TMP0); |
132 | + case TCG_TYPE_I64: | ||
133 | + tcg_debug_assert(TCG_TARGET_REG_BITS == 64); | ||
134 | + /* fallthru */ | ||
135 | + case TCG_TYPE_I32: | ||
136 | + if (ret < TCG_REG_V0 && arg < TCG_REG_V0) { | ||
137 | + tcg_out32(s, OR | SAB(arg, ret, arg)); | ||
138 | + break; | ||
139 | + } else if (ret < TCG_REG_V0 || arg < TCG_REG_V0) { | ||
140 | + /* Altivec does not support vector/integer moves. */ | ||
141 | + return false; | ||
142 | + } | ||
143 | + /* fallthru */ | ||
144 | + case TCG_TYPE_V64: | ||
145 | + case TCG_TYPE_V128: | ||
146 | + tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0); | ||
147 | + tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg)); | ||
148 | + break; | ||
149 | + default: | ||
150 | + g_assert_not_reached(); | ||
151 | } | ||
152 | return true; | ||
153 | } | ||
154 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, | ||
155 | static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret, | ||
156 | tcg_target_long val) | ||
157 | { | ||
158 | - g_assert_not_reached(); | ||
159 | + uint32_t load_insn; | ||
160 | + int rel, low; | ||
161 | + intptr_t add; | ||
162 | + | ||
163 | + low = (int8_t)val; | ||
164 | + if (low >= -16 && low < 16) { | ||
165 | + if (val == (tcg_target_long)dup_const(MO_8, low)) { | ||
166 | + tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16)); | ||
167 | + return; | ||
168 | + } | ||
169 | + if (val == (tcg_target_long)dup_const(MO_16, low)) { | ||
170 | + tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16)); | ||
171 | + return; | ||
172 | + } | ||
173 | + if (val == (tcg_target_long)dup_const(MO_32, low)) { | ||
174 | + tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16)); | ||
175 | + return; | ||
176 | + } | ||
177 | + } | ||
178 | + | ||
179 | + /* | ||
180 | + * Otherwise we must load the value from the constant pool. | ||
181 | + */ | ||
182 | + if (USE_REG_TB) { | ||
183 | + rel = R_PPC_ADDR16; | ||
184 | + add = -(intptr_t)s->code_gen_ptr; | ||
185 | + } else { | ||
186 | + rel = R_PPC_ADDR32; | ||
187 | + add = 0; | ||
188 | + } | ||
189 | + | ||
190 | + load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1); | ||
191 | + if (TCG_TARGET_REG_BITS == 64) { | ||
192 | + new_pool_l2(s, rel, s->code_ptr, add, val, val); | ||
193 | + } else { | ||
194 | + new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val); | ||
195 | + } | ||
196 | + | ||
197 | + if (USE_REG_TB) { | ||
198 | + tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0)); | ||
199 | + load_insn |= RA(TCG_REG_TB); | ||
200 | + } else { | ||
201 | + tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0)); | ||
202 | + tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0)); | ||
203 | + } | ||
204 | + tcg_out32(s, load_insn); | ||
205 | } | ||
206 | |||
207 | static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, | ||
208 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, | ||
209 | align = 3; | ||
210 | /* FALLTHRU */ | ||
211 | default: | ||
212 | - if (rt != TCG_REG_R0) { | ||
213 | + if (rt > TCG_REG_R0 && rt < TCG_REG_V0) { | ||
214 | rs = rt; | ||
215 | break; | ||
216 | } | ||
217 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, | ||
218 | } | ||
219 | |||
220 | /* For unaligned, or very large offsets, use the indexed form. */ | ||
221 | - if (offset & align || offset != (int32_t)offset) { | ||
222 | + if (offset & align || offset != (int32_t)offset || opi == 0) { | ||
223 | if (rs == base) { | ||
224 | rs = TCG_REG_R0; | ||
225 | } | ||
226 | tcg_debug_assert(!is_store || rs != rt); | ||
227 | tcg_out_movi(s, TCG_TYPE_PTR, rs, orig); | ||
228 | - tcg_out32(s, opx | TAB(rt, base, rs)); | ||
229 | + tcg_out32(s, opx | TAB(rt & 31, base, rs)); | ||
230 | return; | ||
231 | } | ||
232 | |||
233 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt, | ||
234 | base = rs; | ||
235 | } | ||
236 | if (opi != ADDI || base != rt || l0 != 0) { | ||
237 | - tcg_out32(s, opi | TAI(rt, base, l0)); | ||
238 | + tcg_out32(s, opi | TAI(rt & 31, base, l0)); | ||
239 | } | 53 | } |
240 | } | 54 | } |
241 | 55 | ||
242 | -static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, | 56 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl, |
243 | - TCGReg arg1, intptr_t arg2) | 57 | AArch64Insn insn; |
244 | +static void tcg_out_vsldoi(TCGContext *s, TCGReg ret, | 58 | |
245 | + TCGReg va, TCGReg vb, int shb) | 59 | if (rl == ah || (!const_bh && rl == bh)) { |
60 | - rl = TCG_REG_TMP; | ||
61 | + rl = TCG_REG_TMP0; | ||
62 | } | ||
63 | |||
64 | if (const_bl) { | ||
65 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl, | ||
66 | possibility of adding 0+const in the low part, and the | ||
67 | immediate add instructions encode XSP not XZR. Don't try | ||
68 | anything more elaborate here than loading another zero. */ | ||
69 | - al = TCG_REG_TMP; | ||
70 | + al = TCG_REG_TMP0; | ||
71 | tcg_out_movi(s, ext, al, 0); | ||
72 | } | ||
73 | tcg_out_insn_3401(s, insn, ext, rl, al, bl); | ||
74 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d, | ||
246 | { | 75 | { |
247 | - int opi, opx; | 76 | TCGReg a1 = a0; |
248 | - | 77 | if (is_ctz) { |
249 | - tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); | 78 | - a1 = TCG_REG_TMP; |
250 | - if (type == TCG_TYPE_I32) { | 79 | + a1 = TCG_REG_TMP0; |
251 | - opi = LWZ, opx = LWZX; | 80 | tcg_out_insn(s, 3507, RBIT, ext, a1, a0); |
252 | - } else { | 81 | } |
253 | - opi = LD, opx = LDX; | 82 | if (const_b && b == (ext ? 64 : 32)) { |
254 | - } | 83 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d, |
255 | - tcg_out_mem_long(s, opi, opx, ret, arg1, arg2); | 84 | AArch64Insn sel = I3506_CSEL; |
256 | + tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6)); | 85 | |
86 | tcg_out_cmp(s, ext, a0, 0, 1); | ||
87 | - tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1); | ||
88 | + tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP0, a1); | ||
89 | |||
90 | if (const_b) { | ||
91 | if (b == -1) { | ||
92 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d, | ||
93 | b = d; | ||
94 | } | ||
95 | } | ||
96 | - tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE); | ||
97 | + tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP0, b, TCG_COND_NE); | ||
98 | } | ||
257 | } | 99 | } |
258 | 100 | ||
259 | -static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, | 101 | @@ -XXX,XX +XXX,XX @@ bool tcg_target_has_memory_bswap(MemOp memop) |
260 | - TCGReg arg1, intptr_t arg2) | 102 | } |
261 | +static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, | 103 | |
262 | + TCGReg base, intptr_t offset) | 104 | static const TCGLdstHelperParam ldst_helper_param = { |
263 | { | 105 | - .ntmp = 1, .tmp = { TCG_REG_TMP } |
264 | - int opi, opx; | 106 | + .ntmp = 1, .tmp = { TCG_REG_TMP0 } |
265 | + int shift; | 107 | }; |
266 | 108 | ||
267 | - tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32); | 109 | static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) |
268 | - if (type == TCG_TYPE_I32) { | 110 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_goto_tb(TCGContext *s, int which) |
269 | - opi = STW, opx = STWX; | 111 | |
270 | - } else { | 112 | set_jmp_insn_offset(s, which); |
271 | - opi = STD, opx = STDX; | 113 | tcg_out32(s, I3206_B); |
272 | + switch (type) { | 114 | - tcg_out_insn(s, 3207, BR, TCG_REG_TMP); |
273 | + case TCG_TYPE_I32: | 115 | + tcg_out_insn(s, 3207, BR, TCG_REG_TMP0); |
274 | + if (ret < TCG_REG_V0) { | 116 | set_jmp_reset_offset(s, which); |
275 | + tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset); | 117 | } |
276 | + break; | 118 | |
277 | + } | 119 | @@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n, |
278 | + tcg_debug_assert((offset & 3) == 0); | 120 | ptrdiff_t i_offset = i_addr - jmp_rx; |
279 | + tcg_out_mem_long(s, 0, LVEWX, ret, base, offset); | 121 | |
280 | + shift = (offset - 4) & 0xc; | 122 | /* Note that we asserted this in range in tcg_out_goto_tb. */ |
281 | + if (shift) { | 123 | - insn = deposit32(I3305_LDR | TCG_REG_TMP, 5, 19, i_offset >> 2); |
282 | + tcg_out_vsldoi(s, ret, ret, ret, shift); | 124 | + insn = deposit32(I3305_LDR | TCG_REG_TMP0, 5, 19, i_offset >> 2); |
283 | + } | ||
284 | + break; | ||
285 | + case TCG_TYPE_I64: | ||
286 | + if (ret < TCG_REG_V0) { | ||
287 | + tcg_debug_assert(TCG_TARGET_REG_BITS == 64); | ||
288 | + tcg_out_mem_long(s, LD, LDX, ret, base, offset); | ||
289 | + break; | ||
290 | + } | ||
291 | + /* fallthru */ | ||
292 | + case TCG_TYPE_V64: | ||
293 | + tcg_debug_assert(ret >= TCG_REG_V0); | ||
294 | + tcg_debug_assert((offset & 7) == 0); | ||
295 | + tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16); | ||
296 | + if (offset & 8) { | ||
297 | + tcg_out_vsldoi(s, ret, ret, ret, 8); | ||
298 | + } | ||
299 | + break; | ||
300 | + case TCG_TYPE_V128: | ||
301 | + tcg_debug_assert(ret >= TCG_REG_V0); | ||
302 | + tcg_debug_assert((offset & 15) == 0); | ||
303 | + tcg_out_mem_long(s, 0, LVX, ret, base, offset); | ||
304 | + break; | ||
305 | + default: | ||
306 | + g_assert_not_reached(); | ||
307 | + } | ||
308 | +} | ||
309 | + | ||
310 | +static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, | ||
311 | + TCGReg base, intptr_t offset) | ||
312 | +{ | ||
313 | + int shift; | ||
314 | + | ||
315 | + switch (type) { | ||
316 | + case TCG_TYPE_I32: | ||
317 | + if (arg < TCG_REG_V0) { | ||
318 | + tcg_out_mem_long(s, STW, STWX, arg, base, offset); | ||
319 | + break; | ||
320 | + } | ||
321 | + tcg_debug_assert((offset & 3) == 0); | ||
322 | + shift = (offset - 4) & 0xc; | ||
323 | + if (shift) { | ||
324 | + tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift); | ||
325 | + arg = TCG_VEC_TMP1; | ||
326 | + } | ||
327 | + tcg_out_mem_long(s, 0, STVEWX, arg, base, offset); | ||
328 | + break; | ||
329 | + case TCG_TYPE_I64: | ||
330 | + if (arg < TCG_REG_V0) { | ||
331 | + tcg_debug_assert(TCG_TARGET_REG_BITS == 64); | ||
332 | + tcg_out_mem_long(s, STD, STDX, arg, base, offset); | ||
333 | + break; | ||
334 | + } | ||
335 | + /* fallthru */ | ||
336 | + case TCG_TYPE_V64: | ||
337 | + tcg_debug_assert(arg >= TCG_REG_V0); | ||
338 | + tcg_debug_assert((offset & 7) == 0); | ||
339 | + if (offset & 8) { | ||
340 | + tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8); | ||
341 | + arg = TCG_VEC_TMP1; | ||
342 | + } | ||
343 | + tcg_out_mem_long(s, 0, STVEWX, arg, base, offset); | ||
344 | + tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4); | ||
345 | + break; | ||
346 | + case TCG_TYPE_V128: | ||
347 | + tcg_debug_assert(arg >= TCG_REG_V0); | ||
348 | + tcg_out_mem_long(s, 0, STVX, arg, base, offset); | ||
349 | + break; | ||
350 | + default: | ||
351 | + g_assert_not_reached(); | ||
352 | } | 125 | } |
353 | - tcg_out_mem_long(s, opi, opx, arg, arg1, arg2); | 126 | qatomic_set((uint32_t *)jmp_rw, insn); |
127 | flush_idcache_range(jmp_rx, jmp_rw, 4); | ||
128 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, | ||
129 | |||
130 | case INDEX_op_rem_i64: | ||
131 | case INDEX_op_rem_i32: | ||
132 | - tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2); | ||
133 | - tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); | ||
134 | + tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP0, a1, a2); | ||
135 | + tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1); | ||
136 | break; | ||
137 | case INDEX_op_remu_i64: | ||
138 | case INDEX_op_remu_i32: | ||
139 | - tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2); | ||
140 | - tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); | ||
141 | + tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP0, a1, a2); | ||
142 | + tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1); | ||
143 | break; | ||
144 | |||
145 | case INDEX_op_shl_i64: | ||
146 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, | ||
147 | if (c2) { | ||
148 | tcg_out_rotl(s, ext, a0, a1, a2); | ||
149 | } else { | ||
150 | - tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2); | ||
151 | - tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP); | ||
152 | + tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP0, TCG_REG_XZR, a2); | ||
153 | + tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP0); | ||
154 | } | ||
155 | break; | ||
156 | |||
157 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
158 | break; | ||
159 | } | ||
160 | } | ||
161 | - tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0); | ||
162 | - a2 = TCG_VEC_TMP; | ||
163 | + tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP0, 0); | ||
164 | + a2 = TCG_VEC_TMP0; | ||
165 | } | ||
166 | if (is_scalar) { | ||
167 | insn = cmp_scalar_insn[cond]; | ||
168 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | ||
169 | s->reserved_regs = 0; | ||
170 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); | ||
171 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP); | ||
172 | - tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP); | ||
173 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */ | ||
174 | - tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP); | ||
175 | + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0); | ||
176 | + tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0); | ||
354 | } | 177 | } |
355 | 178 | ||
356 | static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, | 179 | /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */ |
357 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, | ||
358 | |||
359 | int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | ||
360 | { | ||
361 | - g_assert_not_reached(); | ||
362 | + switch (opc) { | ||
363 | + case INDEX_op_and_vec: | ||
364 | + case INDEX_op_or_vec: | ||
365 | + case INDEX_op_xor_vec: | ||
366 | + case INDEX_op_andc_vec: | ||
367 | + case INDEX_op_not_vec: | ||
368 | + return 1; | ||
369 | + case INDEX_op_cmp_vec: | ||
370 | + return vece <= MO_32 ? -1 : 0; | ||
371 | + default: | ||
372 | + return 0; | ||
373 | + } | ||
374 | } | ||
375 | |||
376 | static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, | ||
377 | TCGReg dst, TCGReg src) | ||
378 | { | ||
379 | - g_assert_not_reached(); | ||
380 | + tcg_debug_assert(dst >= TCG_REG_V0); | ||
381 | + tcg_debug_assert(src >= TCG_REG_V0); | ||
382 | + | ||
383 | + /* | ||
384 | + * Recall we use (or emulate) VSX integer loads, so the integer is | ||
385 | + * right justified within the left (zero-index) double-word. | ||
386 | + */ | ||
387 | + switch (vece) { | ||
388 | + case MO_8: | ||
389 | + tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16)); | ||
390 | + break; | ||
391 | + case MO_16: | ||
392 | + tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16)); | ||
393 | + break; | ||
394 | + case MO_32: | ||
395 | + tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16)); | ||
396 | + break; | ||
397 | + case MO_64: | ||
398 | + tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8); | ||
399 | + tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8); | ||
400 | + break; | ||
401 | + default: | ||
402 | + g_assert_not_reached(); | ||
403 | + } | ||
404 | + return true; | ||
405 | } | ||
406 | |||
407 | static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, | ||
408 | TCGReg out, TCGReg base, intptr_t offset) | ||
409 | { | ||
410 | - g_assert_not_reached(); | ||
411 | + int elt; | ||
412 | + | ||
413 | + tcg_debug_assert(out >= TCG_REG_V0); | ||
414 | + switch (vece) { | ||
415 | + case MO_8: | ||
416 | + tcg_out_mem_long(s, 0, LVEBX, out, base, offset); | ||
417 | + elt = extract32(offset, 0, 4); | ||
418 | +#ifndef HOST_WORDS_BIGENDIAN | ||
419 | + elt ^= 15; | ||
420 | +#endif | ||
421 | + tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16)); | ||
422 | + break; | ||
423 | + case MO_16: | ||
424 | + tcg_debug_assert((offset & 1) == 0); | ||
425 | + tcg_out_mem_long(s, 0, LVEHX, out, base, offset); | ||
426 | + elt = extract32(offset, 1, 3); | ||
427 | +#ifndef HOST_WORDS_BIGENDIAN | ||
428 | + elt ^= 7; | ||
429 | +#endif | ||
430 | + tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16)); | ||
431 | + break; | ||
432 | + case MO_32: | ||
433 | + tcg_debug_assert((offset & 3) == 0); | ||
434 | + tcg_out_mem_long(s, 0, LVEWX, out, base, offset); | ||
435 | + elt = extract32(offset, 2, 2); | ||
436 | +#ifndef HOST_WORDS_BIGENDIAN | ||
437 | + elt ^= 3; | ||
438 | +#endif | ||
439 | + tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16)); | ||
440 | + break; | ||
441 | + case MO_64: | ||
442 | + tcg_debug_assert((offset & 7) == 0); | ||
443 | + tcg_out_mem_long(s, 0, LVX, out, base, offset & -16); | ||
444 | + tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8); | ||
445 | + elt = extract32(offset, 3, 1); | ||
446 | +#ifndef HOST_WORDS_BIGENDIAN | ||
447 | + elt = !elt; | ||
448 | +#endif | ||
449 | + if (elt) { | ||
450 | + tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8); | ||
451 | + } else { | ||
452 | + tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8); | ||
453 | + } | ||
454 | + break; | ||
455 | + default: | ||
456 | + g_assert_not_reached(); | ||
457 | + } | ||
458 | + return true; | ||
459 | } | ||
460 | |||
461 | static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
462 | unsigned vecl, unsigned vece, | ||
463 | const TCGArg *args, const int *const_args) | ||
464 | { | ||
465 | - g_assert_not_reached(); | ||
466 | + static const uint32_t | ||
467 | + eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 }, | ||
468 | + gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 }, | ||
469 | + gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 }; | ||
470 | + | ||
471 | + TCGType type = vecl + TCG_TYPE_V64; | ||
472 | + TCGArg a0 = args[0], a1 = args[1], a2 = args[2]; | ||
473 | + uint32_t insn; | ||
474 | + | ||
475 | + switch (opc) { | ||
476 | + case INDEX_op_ld_vec: | ||
477 | + tcg_out_ld(s, type, a0, a1, a2); | ||
478 | + return; | ||
479 | + case INDEX_op_st_vec: | ||
480 | + tcg_out_st(s, type, a0, a1, a2); | ||
481 | + return; | ||
482 | + case INDEX_op_dupm_vec: | ||
483 | + tcg_out_dupm_vec(s, type, vece, a0, a1, a2); | ||
484 | + return; | ||
485 | + | ||
486 | + case INDEX_op_and_vec: | ||
487 | + insn = VAND; | ||
488 | + break; | ||
489 | + case INDEX_op_or_vec: | ||
490 | + insn = VOR; | ||
491 | + break; | ||
492 | + case INDEX_op_xor_vec: | ||
493 | + insn = VXOR; | ||
494 | + break; | ||
495 | + case INDEX_op_andc_vec: | ||
496 | + insn = VANDC; | ||
497 | + break; | ||
498 | + case INDEX_op_not_vec: | ||
499 | + insn = VNOR; | ||
500 | + a2 = a1; | ||
501 | + break; | ||
502 | + | ||
503 | + case INDEX_op_cmp_vec: | ||
504 | + switch (args[3]) { | ||
505 | + case TCG_COND_EQ: | ||
506 | + insn = eq_op[vece]; | ||
507 | + break; | ||
508 | + case TCG_COND_GT: | ||
509 | + insn = gts_op[vece]; | ||
510 | + break; | ||
511 | + case TCG_COND_GTU: | ||
512 | + insn = gtu_op[vece]; | ||
513 | + break; | ||
514 | + default: | ||
515 | + g_assert_not_reached(); | ||
516 | + } | ||
517 | + break; | ||
518 | + | ||
519 | + case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ | ||
520 | + case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */ | ||
521 | + case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ | ||
522 | + default: | ||
523 | + g_assert_not_reached(); | ||
524 | + } | ||
525 | + | ||
526 | + tcg_debug_assert(insn != 0); | ||
527 | + tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2)); | ||
528 | +} | ||
529 | + | ||
530 | +static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, | ||
531 | + TCGv_vec v1, TCGv_vec v2, TCGCond cond) | ||
532 | +{ | ||
533 | + bool need_swap = false, need_inv = false; | ||
534 | + | ||
535 | + tcg_debug_assert(vece <= MO_32); | ||
536 | + | ||
537 | + switch (cond) { | ||
538 | + case TCG_COND_EQ: | ||
539 | + case TCG_COND_GT: | ||
540 | + case TCG_COND_GTU: | ||
541 | + break; | ||
542 | + case TCG_COND_NE: | ||
543 | + case TCG_COND_LE: | ||
544 | + case TCG_COND_LEU: | ||
545 | + need_inv = true; | ||
546 | + break; | ||
547 | + case TCG_COND_LT: | ||
548 | + case TCG_COND_LTU: | ||
549 | + need_swap = true; | ||
550 | + break; | ||
551 | + case TCG_COND_GE: | ||
552 | + case TCG_COND_GEU: | ||
553 | + need_swap = need_inv = true; | ||
554 | + break; | ||
555 | + default: | ||
556 | + g_assert_not_reached(); | ||
557 | + } | ||
558 | + | ||
559 | + if (need_inv) { | ||
560 | + cond = tcg_invert_cond(cond); | ||
561 | + } | ||
562 | + if (need_swap) { | ||
563 | + TCGv_vec t1; | ||
564 | + t1 = v1, v1 = v2, v2 = t1; | ||
565 | + cond = tcg_swap_cond(cond); | ||
566 | + } | ||
567 | + | ||
568 | + vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0), | ||
569 | + tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond); | ||
570 | + | ||
571 | + if (need_inv) { | ||
572 | + tcg_gen_not_vec(vece, v0, v0); | ||
573 | + } | ||
574 | } | ||
575 | |||
576 | void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, | ||
577 | TCGArg a0, ...) | ||
578 | { | ||
579 | - g_assert_not_reached(); | ||
580 | + va_list va; | ||
581 | + TCGv_vec v0, v1, v2; | ||
582 | + | ||
583 | + va_start(va, a0); | ||
584 | + v0 = temp_tcgv_vec(arg_temp(a0)); | ||
585 | + v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); | ||
586 | + v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); | ||
587 | + | ||
588 | + switch (opc) { | ||
589 | + case INDEX_op_cmp_vec: | ||
590 | + expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg)); | ||
591 | + break; | ||
592 | + default: | ||
593 | + g_assert_not_reached(); | ||
594 | + } | ||
595 | + va_end(va); | ||
596 | } | ||
597 | |||
598 | static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
599 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
600 | = { .args_ct_str = { "r", "r", "r", "r", "rI", "rZM" } }; | ||
601 | static const TCGTargetOpDef sub2 | ||
602 | = { .args_ct_str = { "r", "r", "rI", "rZM", "r", "r" } }; | ||
603 | + static const TCGTargetOpDef v_r = { .args_ct_str = { "v", "r" } }; | ||
604 | + static const TCGTargetOpDef v_v = { .args_ct_str = { "v", "v" } }; | ||
605 | + static const TCGTargetOpDef v_v_v = { .args_ct_str = { "v", "v", "v" } }; | ||
606 | |||
607 | switch (op) { | ||
608 | case INDEX_op_goto_ptr: | ||
609 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
610 | return (TCG_TARGET_REG_BITS == 64 ? &S_S | ||
611 | : TARGET_LONG_BITS == 32 ? &S_S_S : &S_S_S_S); | ||
612 | |||
613 | + case INDEX_op_and_vec: | ||
614 | + case INDEX_op_or_vec: | ||
615 | + case INDEX_op_xor_vec: | ||
616 | + case INDEX_op_andc_vec: | ||
617 | + case INDEX_op_orc_vec: | ||
618 | + case INDEX_op_cmp_vec: | ||
619 | + return &v_v_v; | ||
620 | + case INDEX_op_not_vec: | ||
621 | + case INDEX_op_dup_vec: | ||
622 | + return &v_v; | ||
623 | + case INDEX_op_ld_vec: | ||
624 | + case INDEX_op_st_vec: | ||
625 | + case INDEX_op_dupm_vec: | ||
626 | + return &v_r; | ||
627 | + | ||
628 | default: | ||
629 | return NULL; | ||
630 | } | ||
631 | -- | 180 | -- |
632 | 2.17.1 | 181 | 2.34.1 |
633 | |||
634 | diff view generated by jsdifflib |
1 | Previously we've been hard-coding knowledge that Power7 has ISEL, but | 1 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
---|---|---|---|
2 | it was an optional instruction before that. Use the AT_HWCAP2 bit, | ||
3 | when present, to properly determine support. | ||
4 | |||
5 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 2 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
7 | --- | 3 | --- |
8 | tcg/ppc/tcg-target.inc.c | 17 ++++++++++++----- | 4 | tcg/aarch64/tcg-target.c.inc | 9 +++++++-- |
9 | 1 file changed, 12 insertions(+), 5 deletions(-) | 5 | 1 file changed, 7 insertions(+), 2 deletions(-) |
10 | 6 | ||
11 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 7 | diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc |
12 | index XXXXXXX..XXXXXXX 100644 | 8 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/tcg/ppc/tcg-target.inc.c | 9 | --- a/tcg/aarch64/tcg-target.c.inc |
14 | +++ b/tcg/ppc/tcg-target.inc.c | 10 | +++ b/tcg/aarch64/tcg-target.c.inc |
15 | @@ -XXX,XX +XXX,XX @@ | 11 | @@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = { |
16 | static tcg_insn_unit *tb_ret_addr; | 12 | |
17 | 13 | TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, | |
18 | TCGPowerISA have_isa; | 14 | TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15, |
19 | - | 15 | - TCG_REG_X16, TCG_REG_X17, |
20 | -#define HAVE_ISEL have_isa_2_06 | 16 | |
21 | +static bool have_isel; | 17 | TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, |
18 | TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7, | ||
19 | |||
20 | + /* X16 reserved as temporary */ | ||
21 | + /* X17 reserved as temporary */ | ||
22 | /* X18 reserved by system */ | ||
23 | /* X19 reserved for AREG0 */ | ||
24 | /* X29 reserved as fp */ | ||
25 | @@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) | ||
26 | return TCG_REG_X0 + slot; | ||
27 | } | ||
28 | |||
29 | -#define TCG_REG_TMP0 TCG_REG_X30 | ||
30 | +#define TCG_REG_TMP0 TCG_REG_X16 | ||
31 | +#define TCG_REG_TMP1 TCG_REG_X17 | ||
32 | +#define TCG_REG_TMP2 TCG_REG_X30 | ||
33 | #define TCG_VEC_TMP0 TCG_REG_V31 | ||
22 | 34 | ||
23 | #ifndef CONFIG_SOFTMMU | 35 | #ifndef CONFIG_SOFTMMU |
24 | #define TCG_GUEST_BASE_REG 30 | ||
25 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, | ||
26 | /* If we have ISEL, we can implement everything with 3 or 4 insns. | ||
27 | All other cases below are also at least 3 insns, so speed up the | ||
28 | code generator by not considering them and always using ISEL. */ | ||
29 | - if (HAVE_ISEL) { | ||
30 | + if (have_isel) { | ||
31 | int isel, tab; | ||
32 | |||
33 | tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); | ||
34 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond, | ||
35 | |||
36 | tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type); | ||
37 | |||
38 | - if (HAVE_ISEL) { | ||
39 | + if (have_isel) { | ||
40 | int isel = tcg_to_isel[cond]; | ||
41 | |||
42 | /* Swap the V operands if the operation indicates inversion. */ | ||
43 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc, | ||
44 | } else { | ||
45 | tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 7, type); | ||
46 | /* Note that the only other valid constant for a2 is 0. */ | ||
47 | - if (HAVE_ISEL) { | ||
48 | + if (have_isel) { | ||
49 | tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1)); | ||
50 | tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0)); | ||
51 | } else if (!const_a2 && a0 == a2) { | ||
52 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | 36 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) |
53 | } | 37 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP); |
54 | #endif | 38 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */ |
55 | 39 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0); | |
56 | +#ifdef PPC_FEATURE2_HAS_ISEL | 40 | + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); |
57 | + /* Prefer explicit instruction from the kernel. */ | 41 | + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2); |
58 | + have_isel = (hwcap2 & PPC_FEATURE2_HAS_ISEL) != 0; | 42 | tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0); |
59 | +#else | 43 | } |
60 | + /* Fall back to knowing Power7 (2.06) has ISEL. */ | ||
61 | + have_isel = have_isa_2_06; | ||
62 | +#endif | ||
63 | + | ||
64 | tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff; | ||
65 | tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff; | ||
66 | 44 | ||
67 | -- | 45 | -- |
68 | 2.17.1 | 46 | 2.34.1 |
69 | |||
70 | diff view generated by jsdifflib |
1 | These new instructions are conditional only on MSR.VEC and | 1 | Adjust the softmmu tlb to use TMP[0-2], not any of the normally available |
---|---|---|---|
2 | are thus part of the Altivec instruction set, and not VSX. | 2 | registers. Since we handle overlap betwen inputs and helper arguments, |
3 | This includes negation and compare not equal. | 3 | we can allow any allocatable reg. |
4 | 4 | ||
5 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> | 5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
7 | --- | 7 | --- |
8 | tcg/ppc/tcg-target.h | 2 +- | 8 | tcg/aarch64/tcg-target-con-set.h | 2 -- |
9 | tcg/ppc/tcg-target.inc.c | 23 +++++++++++++++++++++++ | 9 | tcg/aarch64/tcg-target-con-str.h | 1 - |
10 | 2 files changed, 24 insertions(+), 1 deletion(-) | 10 | tcg/aarch64/tcg-target.c.inc | 45 ++++++++++++++------------------ |
11 | 3 files changed, 19 insertions(+), 29 deletions(-) | ||
11 | 12 | ||
12 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | 13 | diff --git a/tcg/aarch64/tcg-target-con-set.h b/tcg/aarch64/tcg-target-con-set.h |
13 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/tcg/ppc/tcg-target.h | 15 | --- a/tcg/aarch64/tcg-target-con-set.h |
15 | +++ b/tcg/ppc/tcg-target.h | 16 | +++ b/tcg/aarch64/tcg-target-con-set.h |
16 | @@ -XXX,XX +XXX,XX @@ extern bool have_vsx; | 17 | @@ -XXX,XX +XXX,XX @@ |
17 | #define TCG_TARGET_HAS_andc_vec 1 | 18 | * tcg-target-con-str.h; the constraint combination is inclusive or. |
18 | #define TCG_TARGET_HAS_orc_vec have_isa_2_07 | 19 | */ |
19 | #define TCG_TARGET_HAS_not_vec 1 | 20 | C_O0_I1(r) |
20 | -#define TCG_TARGET_HAS_neg_vec 0 | 21 | -C_O0_I2(lZ, l) |
21 | +#define TCG_TARGET_HAS_neg_vec have_isa_3_00 | 22 | C_O0_I2(r, rA) |
22 | #define TCG_TARGET_HAS_abs_vec 0 | 23 | C_O0_I2(rZ, r) |
23 | #define TCG_TARGET_HAS_shi_vec 0 | 24 | C_O0_I2(w, r) |
24 | #define TCG_TARGET_HAS_shs_vec 0 | 25 | -C_O1_I1(r, l) |
25 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 26 | C_O1_I1(r, r) |
27 | C_O1_I1(w, r) | ||
28 | C_O1_I1(w, w) | ||
29 | diff --git a/tcg/aarch64/tcg-target-con-str.h b/tcg/aarch64/tcg-target-con-str.h | ||
26 | index XXXXXXX..XXXXXXX 100644 | 30 | index XXXXXXX..XXXXXXX 100644 |
27 | --- a/tcg/ppc/tcg-target.inc.c | 31 | --- a/tcg/aarch64/tcg-target-con-str.h |
28 | +++ b/tcg/ppc/tcg-target.inc.c | 32 | +++ b/tcg/aarch64/tcg-target-con-str.h |
29 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | 33 | @@ -XXX,XX +XXX,XX @@ |
30 | #define VSUBUWM VX4(1152) | 34 | * REGS(letter, register_mask) |
31 | #define VSUBUDM VX4(1216) /* v2.07 */ | 35 | */ |
32 | 36 | REGS('r', ALL_GENERAL_REGS) | |
33 | +#define VNEGW (VX4(1538) | (6 << 16)) /* v3.00 */ | 37 | -REGS('l', ALL_QLDST_REGS) |
34 | +#define VNEGD (VX4(1538) | (7 << 16)) /* v3.00 */ | 38 | REGS('w', ALL_VECTOR_REGS) |
35 | + | 39 | |
36 | #define VMAXSB VX4(258) | 40 | /* |
37 | #define VMAXSH VX4(322) | 41 | diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc |
38 | #define VMAXSW VX4(386) | 42 | index XXXXXXX..XXXXXXX 100644 |
39 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | 43 | --- a/tcg/aarch64/tcg-target.c.inc |
40 | #define VCMPGTUH VX4(582) | 44 | +++ b/tcg/aarch64/tcg-target.c.inc |
41 | #define VCMPGTUW VX4(646) | 45 | @@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, |
42 | #define VCMPGTUD VX4(711) /* v2.07 */ | 46 | #define ALL_GENERAL_REGS 0xffffffffu |
43 | +#define VCMPNEB VX4(7) /* v3.00 */ | 47 | #define ALL_VECTOR_REGS 0xffffffff00000000ull |
44 | +#define VCMPNEH VX4(71) /* v3.00 */ | 48 | |
45 | +#define VCMPNEW VX4(135) /* v3.00 */ | 49 | -#ifdef CONFIG_SOFTMMU |
46 | 50 | -#define ALL_QLDST_REGS \ | |
47 | #define VSLB VX4(260) | 51 | - (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \ |
48 | #define VSLH VX4(324) | 52 | - (1 << TCG_REG_X2) | (1 << TCG_REG_X3))) |
49 | @@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | 53 | -#else |
50 | case INDEX_op_shri_vec: | 54 | -#define ALL_QLDST_REGS ALL_GENERAL_REGS |
51 | case INDEX_op_sari_vec: | 55 | -#endif |
52 | return vece <= MO_32 || have_isa_2_07 ? -1 : 0; | 56 | - |
53 | + case INDEX_op_neg_vec: | 57 | /* Match a constant valid for addition (12-bit, optionally shifted). */ |
54 | + return vece >= MO_32 && have_isa_3_00; | 58 | static inline bool is_aimm(uint64_t val) |
55 | case INDEX_op_mul_vec: | 59 | { |
56 | switch (vece) { | 60 | @@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, |
57 | case MO_8: | 61 | unsigned s_bits = opc & MO_SIZE; |
58 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | 62 | unsigned s_mask = (1u << s_bits) - 1; |
59 | static const uint32_t | 63 | unsigned mem_index = get_mmuidx(oi); |
60 | add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM }, | 64 | - TCGReg x3; |
61 | sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM }, | 65 | + TCGReg addr_adj; |
62 | + neg_op[4] = { 0, 0, VNEGW, VNEGD }, | 66 | TCGType mask_type; |
63 | eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD }, | 67 | uint64_t compare_mask; |
64 | + ne_op[4] = { VCMPNEB, VCMPNEH, VCMPNEW, 0 }, | 68 | |
65 | gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD }, | 69 | @@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, |
66 | gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD }, | 70 | mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32 |
67 | ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 }, | 71 | ? TCG_TYPE_I64 : TCG_TYPE_I32); |
68 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | 72 | |
69 | case INDEX_op_sub_vec: | 73 | - /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */ |
70 | insn = sub_op[vece]; | 74 | + /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {tmp0,tmp1}. */ |
71 | break; | 75 | QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); |
72 | + case INDEX_op_neg_vec: | 76 | QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512); |
73 | + insn = neg_op[vece]; | 77 | QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); |
74 | + a2 = a1; | 78 | QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8); |
75 | + a1 = 0; | 79 | - tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0, |
76 | + break; | 80 | + tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0, |
77 | case INDEX_op_mul_vec: | 81 | TLB_MASK_TABLE_OFS(mem_index), 1, 0); |
78 | tcg_debug_assert(vece == MO_32 && have_isa_2_07); | 82 | |
79 | insn = VMULUWM; | 83 | /* Extract the TLB index from the address into X0. */ |
80 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | 84 | tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64, |
81 | case TCG_COND_EQ: | 85 | - TCG_REG_X0, TCG_REG_X0, addr_reg, |
82 | insn = eq_op[vece]; | 86 | + TCG_REG_TMP0, TCG_REG_TMP0, addr_reg, |
83 | break; | 87 | s->page_bits - CPU_TLB_ENTRY_BITS); |
84 | + case TCG_COND_NE: | 88 | |
85 | + insn = ne_op[vece]; | 89 | - /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */ |
86 | + break; | 90 | - tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0); |
87 | case TCG_COND_GT: | 91 | + /* Add the tlb_table pointer, forming the CPUTLBEntry address in TMP1. */ |
88 | insn = gts_op[vece]; | 92 | + tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0); |
89 | break; | 93 | |
90 | @@ -XXX,XX +XXX,XX @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, | 94 | - /* Load the tlb comparator into X0, and the fast path addend into X1. */ |
91 | case TCG_COND_GTU: | 95 | - tcg_out_ld(s, addr_type, TCG_REG_X0, TCG_REG_X1, |
92 | break; | 96 | + /* Load the tlb comparator into TMP0, and the fast path addend into TMP1. */ |
93 | case TCG_COND_NE: | 97 | + tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP1, |
94 | + if (have_isa_3_00 && vece <= MO_32) { | 98 | is_ld ? offsetof(CPUTLBEntry, addr_read) |
95 | + break; | 99 | : offsetof(CPUTLBEntry, addr_write)); |
96 | + } | 100 | - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1, |
97 | + /* fall through */ | 101 | + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, |
98 | case TCG_COND_LE: | 102 | offsetof(CPUTLBEntry, addend)); |
99 | case TCG_COND_LEU: | 103 | |
100 | need_inv = true; | 104 | /* |
101 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | 105 | @@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, |
102 | case INDEX_op_dup2_vec: | 106 | * cross pages using the address of the last byte of the access. |
103 | return &v_v_v; | 107 | */ |
104 | case INDEX_op_not_vec: | 108 | if (a_mask >= s_mask) { |
105 | + case INDEX_op_neg_vec: | 109 | - x3 = addr_reg; |
106 | case INDEX_op_dup_vec: | 110 | + addr_adj = addr_reg; |
107 | return &v_v; | 111 | } else { |
108 | case INDEX_op_ld_vec: | 112 | + addr_adj = TCG_REG_TMP2; |
113 | tcg_out_insn(s, 3401, ADDI, addr_type, | ||
114 | - TCG_REG_X3, addr_reg, s_mask - a_mask); | ||
115 | - x3 = TCG_REG_X3; | ||
116 | + addr_adj, addr_reg, s_mask - a_mask); | ||
117 | } | ||
118 | compare_mask = (uint64_t)s->page_mask | a_mask; | ||
119 | |||
120 | - /* Store the page mask part of the address into X3. */ | ||
121 | - tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_X3, x3, compare_mask); | ||
122 | + /* Store the page mask part of the address into TMP2. */ | ||
123 | + tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2, | ||
124 | + addr_adj, compare_mask); | ||
125 | |||
126 | /* Perform the address comparison. */ | ||
127 | - tcg_out_cmp(s, addr_type, TCG_REG_X0, TCG_REG_X3, 0); | ||
128 | + tcg_out_cmp(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2, 0); | ||
129 | |||
130 | /* If not equal, we jump to the slow path. */ | ||
131 | ldst->label_ptr[0] = s->code_ptr; | ||
132 | tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); | ||
133 | |||
134 | - h->base = TCG_REG_X1, | ||
135 | + h->base = TCG_REG_TMP1; | ||
136 | h->index = addr_reg; | ||
137 | h->index_ext = addr_type; | ||
138 | #else | ||
139 | @@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) | ||
140 | case INDEX_op_qemu_ld_a64_i32: | ||
141 | case INDEX_op_qemu_ld_a32_i64: | ||
142 | case INDEX_op_qemu_ld_a64_i64: | ||
143 | - return C_O1_I1(r, l); | ||
144 | + return C_O1_I1(r, r); | ||
145 | case INDEX_op_qemu_st_a32_i32: | ||
146 | case INDEX_op_qemu_st_a64_i32: | ||
147 | case INDEX_op_qemu_st_a32_i64: | ||
148 | case INDEX_op_qemu_st_a64_i64: | ||
149 | - return C_O0_I2(lZ, l); | ||
150 | + return C_O0_I2(rZ, r); | ||
151 | |||
152 | case INDEX_op_deposit_i32: | ||
153 | case INDEX_op_deposit_i64: | ||
109 | -- | 154 | -- |
110 | 2.17.1 | 155 | 2.34.1 |
111 | |||
112 | diff view generated by jsdifflib |
1 | For Altivec, this is always an expansion. | 1 | With FEAT_LSE2, LDP/STP suffices. Without FEAT_LSE2, use LDXP+STXP |
---|---|---|---|
2 | 16-byte atomicity is required and LDP/STP otherwise. | ||
2 | 3 | ||
4 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
4 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
5 | --- | 6 | --- |
6 | tcg/ppc/tcg-target.h | 2 +- | 7 | tcg/aarch64/tcg-target-con-set.h | 2 + |
7 | tcg/ppc/tcg-target.opc.h | 8 +++ | 8 | tcg/aarch64/tcg-target.h | 11 ++- |
8 | tcg/ppc/tcg-target.inc.c | 113 ++++++++++++++++++++++++++++++++++++++- | 9 | tcg/aarch64/tcg-target.c.inc | 141 ++++++++++++++++++++++++++++++- |
9 | 3 files changed, 121 insertions(+), 2 deletions(-) | 10 | 3 files changed, 151 insertions(+), 3 deletions(-) |
10 | 11 | ||
11 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | 12 | diff --git a/tcg/aarch64/tcg-target-con-set.h b/tcg/aarch64/tcg-target-con-set.h |
12 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/tcg/ppc/tcg-target.h | 14 | --- a/tcg/aarch64/tcg-target-con-set.h |
14 | +++ b/tcg/ppc/tcg-target.h | 15 | +++ b/tcg/aarch64/tcg-target-con-set.h |
15 | @@ -XXX,XX +XXX,XX @@ extern bool have_altivec; | 16 | @@ -XXX,XX +XXX,XX @@ C_O0_I1(r) |
16 | #define TCG_TARGET_HAS_shs_vec 0 | 17 | C_O0_I2(r, rA) |
17 | #define TCG_TARGET_HAS_shv_vec 1 | 18 | C_O0_I2(rZ, r) |
18 | #define TCG_TARGET_HAS_cmp_vec 1 | 19 | C_O0_I2(w, r) |
19 | -#define TCG_TARGET_HAS_mul_vec 0 | 20 | +C_O0_I3(rZ, rZ, r) |
20 | +#define TCG_TARGET_HAS_mul_vec 1 | 21 | C_O1_I1(r, r) |
21 | #define TCG_TARGET_HAS_sat_vec 1 | 22 | C_O1_I1(w, r) |
22 | #define TCG_TARGET_HAS_minmax_vec 1 | 23 | C_O1_I1(w, w) |
23 | #define TCG_TARGET_HAS_bitsel_vec 0 | 24 | @@ -XXX,XX +XXX,XX @@ C_O1_I2(w, w, wO) |
24 | diff --git a/tcg/ppc/tcg-target.opc.h b/tcg/ppc/tcg-target.opc.h | 25 | C_O1_I2(w, w, wZ) |
26 | C_O1_I3(w, w, w, w) | ||
27 | C_O1_I4(r, r, rA, rZ, rZ) | ||
28 | +C_O2_I1(r, r, r) | ||
29 | C_O2_I4(r, r, rZ, rZ, rA, rMZ) | ||
30 | diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h | ||
25 | index XXXXXXX..XXXXXXX 100644 | 31 | index XXXXXXX..XXXXXXX 100644 |
26 | --- a/tcg/ppc/tcg-target.opc.h | 32 | --- a/tcg/aarch64/tcg-target.h |
27 | +++ b/tcg/ppc/tcg-target.opc.h | 33 | +++ b/tcg/aarch64/tcg-target.h |
28 | @@ -XXX,XX +XXX,XX @@ | 34 | @@ -XXX,XX +XXX,XX @@ typedef enum { |
29 | * emitted by tcg_expand_vec_op. For those familiar with GCC internals, | 35 | #define TCG_TARGET_HAS_muluh_i64 1 |
30 | * consider these to be UNSPEC with names. | 36 | #define TCG_TARGET_HAS_mulsh_i64 1 |
31 | */ | 37 | |
32 | + | 38 | -#define TCG_TARGET_HAS_qemu_ldst_i128 0 |
33 | +DEF(ppc_mrgh_vec, 1, 2, 0, IMPLVEC) | 39 | +/* |
34 | +DEF(ppc_mrgl_vec, 1, 2, 0, IMPLVEC) | 40 | + * Without FEAT_LSE2, we must use LDXP+STXP to implement atomic 128-bit load, |
35 | +DEF(ppc_msum_vec, 1, 3, 0, IMPLVEC) | 41 | + * which requires writable pages. We must defer to the helper for user-only, |
36 | +DEF(ppc_muleu_vec, 1, 2, 0, IMPLVEC) | 42 | + * but in system mode all ram is writable for the host. |
37 | +DEF(ppc_mulou_vec, 1, 2, 0, IMPLVEC) | 43 | + */ |
38 | +DEF(ppc_pkum_vec, 1, 2, 0, IMPLVEC) | 44 | +#ifdef CONFIG_USER_ONLY |
39 | +DEF(ppc_rotl_vec, 1, 2, 0, IMPLVEC) | 45 | +#define TCG_TARGET_HAS_qemu_ldst_i128 have_lse2 |
40 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 46 | +#else |
47 | +#define TCG_TARGET_HAS_qemu_ldst_i128 1 | ||
48 | +#endif | ||
49 | |||
50 | #define TCG_TARGET_HAS_v64 1 | ||
51 | #define TCG_TARGET_HAS_v128 1 | ||
52 | diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc | ||
41 | index XXXXXXX..XXXXXXX 100644 | 53 | index XXXXXXX..XXXXXXX 100644 |
42 | --- a/tcg/ppc/tcg-target.inc.c | 54 | --- a/tcg/aarch64/tcg-target.c.inc |
43 | +++ b/tcg/ppc/tcg-target.inc.c | 55 | +++ b/tcg/aarch64/tcg-target.c.inc |
44 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | 56 | @@ -XXX,XX +XXX,XX @@ typedef enum { |
45 | #define VSRAB VX4(772) | 57 | I3305_LDR_v64 = 0x5c000000, |
46 | #define VSRAH VX4(836) | 58 | I3305_LDR_v128 = 0x9c000000, |
47 | #define VSRAW VX4(900) | 59 | |
48 | +#define VRLB VX4(4) | 60 | + /* Load/store exclusive. */ |
49 | +#define VRLH VX4(68) | 61 | + I3306_LDXP = 0xc8600000, |
50 | +#define VRLW VX4(132) | 62 | + I3306_STXP = 0xc8200000, |
51 | + | 63 | + |
52 | +#define VMULEUB VX4(520) | 64 | /* Load/store register. Described here as 3.3.12, but the helper |
53 | +#define VMULEUH VX4(584) | 65 | that emits them can transform to 3.3.10 or 3.3.13. */ |
54 | +#define VMULOUB VX4(8) | 66 | I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30, |
55 | +#define VMULOUH VX4(72) | 67 | @@ -XXX,XX +XXX,XX @@ typedef enum { |
56 | +#define VMSUMUHM VX4(38) | 68 | I3406_ADR = 0x10000000, |
57 | + | 69 | I3406_ADRP = 0x90000000, |
58 | +#define VMRGHB VX4(12) | 70 | |
59 | +#define VMRGHH VX4(76) | 71 | + /* Add/subtract extended register instructions. */ |
60 | +#define VMRGHW VX4(140) | 72 | + I3501_ADD = 0x0b200000, |
61 | +#define VMRGLB VX4(268) | 73 | + |
62 | +#define VMRGLH VX4(332) | 74 | /* Add/subtract shifted register instructions (without a shift). */ |
63 | +#define VMRGLW VX4(396) | 75 | I3502_ADD = 0x0b000000, |
64 | + | 76 | I3502_ADDS = 0x2b000000, |
65 | +#define VPKUHUM VX4(14) | 77 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, |
66 | +#define VPKUWUM VX4(78) | 78 | tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt); |
67 | 79 | } | |
68 | #define VAND VX4(1028) | 80 | |
69 | #define VANDC VX4(1092) | 81 | +static void tcg_out_insn_3306(TCGContext *s, AArch64Insn insn, TCGReg rs, |
70 | @@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | 82 | + TCGReg rt, TCGReg rt2, TCGReg rn) |
71 | case INDEX_op_sarv_vec: | 83 | +{ |
72 | return vece <= MO_32; | 84 | + tcg_out32(s, insn | rs << 16 | rt2 << 10 | rn << 5 | rt); |
73 | case INDEX_op_cmp_vec: | 85 | +} |
74 | + case INDEX_op_mul_vec: | 86 | + |
75 | case INDEX_op_shli_vec: | 87 | static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext, |
76 | case INDEX_op_shri_vec: | 88 | TCGReg rt, int imm19) |
77 | case INDEX_op_sari_vec: | 89 | { |
78 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | 90 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn, |
79 | smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 }, | 91 | tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd); |
80 | shlv_op[4] = { VSLB, VSLH, VSLW, 0 }, | 92 | } |
81 | shrv_op[4] = { VSRB, VSRH, VSRW, 0 }, | 93 | |
82 | - sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 }; | 94 | +static inline void tcg_out_insn_3501(TCGContext *s, AArch64Insn insn, |
83 | + sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 }, | 95 | + TCGType sf, TCGReg rd, TCGReg rn, |
84 | + mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 }, | 96 | + TCGReg rm, int opt, int imm3) |
85 | + mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 }, | 97 | +{ |
86 | + muleu_op[4] = { VMULEUB, VMULEUH, 0, 0 }, | 98 | + tcg_out32(s, insn | sf << 31 | rm << 16 | opt << 13 | |
87 | + mulou_op[4] = { VMULOUB, VMULOUH, 0, 0 }, | 99 | + imm3 << 10 | rn << 5 | rd); |
88 | + pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 }, | 100 | +} |
89 | + rotl_op[4] = { VRLB, VRLH, VRLW, 0 }; | 101 | + |
90 | 102 | /* This function is for both 3.5.2 (Add/Subtract shifted register), for | |
91 | TCGType type = vecl + TCG_TYPE_V64; | 103 | the rare occasion when we actually want to supply a shift amount. */ |
92 | TCGArg a0 = args[0], a1 = args[1], a2 = args[2]; | 104 | static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn, |
93 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | 105 | @@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, |
94 | } | 106 | TCGType addr_type = s->addr_type; |
95 | break; | 107 | TCGLabelQemuLdst *ldst = NULL; |
96 | 108 | MemOp opc = get_memop(oi); | |
97 | + case INDEX_op_ppc_mrgh_vec: | 109 | + MemOp s_bits = opc & MO_SIZE; |
98 | + insn = mrgh_op[vece]; | 110 | unsigned a_mask; |
99 | + break; | 111 | |
100 | + case INDEX_op_ppc_mrgl_vec: | 112 | h->aa = atom_and_align_for_opc(s, opc, |
101 | + insn = mrgl_op[vece]; | 113 | have_lse2 ? MO_ATOM_WITHIN16 |
102 | + break; | 114 | : MO_ATOM_IFALIGN, |
103 | + case INDEX_op_ppc_muleu_vec: | 115 | - false); |
104 | + insn = muleu_op[vece]; | 116 | + s_bits == MO_128); |
105 | + break; | 117 | a_mask = (1 << h->aa.align) - 1; |
106 | + case INDEX_op_ppc_mulou_vec: | 118 | |
107 | + insn = mulou_op[vece]; | 119 | #ifdef CONFIG_SOFTMMU |
108 | + break; | 120 | - unsigned s_bits = opc & MO_SIZE; |
109 | + case INDEX_op_ppc_pkum_vec: | 121 | unsigned s_mask = (1u << s_bits) - 1; |
110 | + insn = pkum_op[vece]; | 122 | unsigned mem_index = get_mmuidx(oi); |
111 | + break; | 123 | TCGReg addr_adj; |
112 | + case INDEX_op_ppc_rotl_vec: | 124 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, |
113 | + insn = rotl_op[vece]; | ||
114 | + break; | ||
115 | + case INDEX_op_ppc_msum_vec: | ||
116 | + tcg_debug_assert(vece == MO_16); | ||
117 | + tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3])); | ||
118 | + return; | ||
119 | + | ||
120 | case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ | ||
121 | case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */ | ||
122 | case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ | ||
123 | @@ -XXX,XX +XXX,XX @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, | ||
124 | } | 125 | } |
125 | } | 126 | } |
126 | 127 | ||
127 | +static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0, | 128 | +static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi, |
128 | + TCGv_vec v1, TCGv_vec v2) | 129 | + TCGReg addr_reg, MemOpIdx oi, bool is_ld) |
129 | +{ | 130 | +{ |
130 | + TCGv_vec t1 = tcg_temp_new_vec(type); | 131 | + TCGLabelQemuLdst *ldst; |
131 | + TCGv_vec t2 = tcg_temp_new_vec(type); | 132 | + HostAddress h; |
132 | + TCGv_vec t3, t4; | 133 | + TCGReg base; |
133 | + | 134 | + bool use_pair; |
134 | + switch (vece) { | 135 | + |
135 | + case MO_8: | 136 | + ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld); |
136 | + case MO_16: | 137 | + |
137 | + vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1), | 138 | + /* Compose the final address, as LDP/STP have no indexing. */ |
138 | + tcgv_vec_arg(v1), tcgv_vec_arg(v2)); | 139 | + if (h.index == TCG_REG_XZR) { |
139 | + vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2), | 140 | + base = h.base; |
140 | + tcgv_vec_arg(v1), tcgv_vec_arg(v2)); | 141 | + } else { |
141 | + vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0), | 142 | + base = TCG_REG_TMP2; |
142 | + tcgv_vec_arg(t1), tcgv_vec_arg(t2)); | 143 | + if (h.index_ext == TCG_TYPE_I32) { |
143 | + vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1), | 144 | + /* add base, base, index, uxtw */ |
144 | + tcgv_vec_arg(t1), tcgv_vec_arg(t2)); | 145 | + tcg_out_insn(s, 3501, ADD, TCG_TYPE_I64, base, |
145 | + vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0), | 146 | + h.base, h.index, MO_32, 0); |
146 | + tcgv_vec_arg(v0), tcgv_vec_arg(t1)); | 147 | + } else { |
147 | + break; | 148 | + /* add base, base, index */ |
148 | + | 149 | + tcg_out_insn(s, 3502, ADD, 1, base, h.base, h.index); |
149 | + case MO_32: | 150 | + } |
150 | + t3 = tcg_temp_new_vec(type); | 151 | + } |
151 | + t4 = tcg_temp_new_vec(type); | 152 | + |
152 | + tcg_gen_dupi_vec(MO_8, t4, -16); | 153 | + use_pair = h.aa.atom < MO_128 || have_lse2; |
153 | + vec_gen_3(INDEX_op_ppc_rotl_vec, type, MO_32, tcgv_vec_arg(t1), | 154 | + |
154 | + tcgv_vec_arg(v2), tcgv_vec_arg(t4)); | 155 | + if (!use_pair) { |
155 | + vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2), | 156 | + tcg_insn_unit *branch = NULL; |
156 | + tcgv_vec_arg(v1), tcgv_vec_arg(v2)); | 157 | + TCGReg ll, lh, sl, sh; |
157 | + tcg_gen_dupi_vec(MO_8, t3, 0); | 158 | + |
158 | + vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t3), | 159 | + /* |
159 | + tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(t3)); | 160 | + * If we have already checked for 16-byte alignment, that's all |
160 | + vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t3), | 161 | + * we need. Otherwise we have determined that misaligned atomicity |
161 | + tcgv_vec_arg(t3), tcgv_vec_arg(t4)); | 162 | + * may be handled with two 8-byte loads. |
162 | + tcg_gen_add_vec(MO_32, v0, t2, t3); | 163 | + */ |
163 | + tcg_temp_free_vec(t3); | 164 | + if (h.aa.align < MO_128) { |
164 | + tcg_temp_free_vec(t4); | 165 | + /* |
166 | + * TODO: align should be MO_64, so we only need test bit 3, | ||
167 | + * which means we could use TBNZ instead of ANDS+B_C. | ||
168 | + */ | ||
169 | + tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, 15); | ||
170 | + branch = s->code_ptr; | ||
171 | + tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); | ||
172 | + use_pair = true; | ||
173 | + } | ||
174 | + | ||
175 | + if (is_ld) { | ||
176 | + /* | ||
177 | + * 16-byte atomicity without LSE2 requires LDXP+STXP loop: | ||
178 | + * ldxp lo, hi, [base] | ||
179 | + * stxp t0, lo, hi, [base] | ||
180 | + * cbnz t0, .-8 | ||
181 | + * Require no overlap between data{lo,hi} and base. | ||
182 | + */ | ||
183 | + if (datalo == base || datahi == base) { | ||
184 | + tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_TMP2, base); | ||
185 | + base = TCG_REG_TMP2; | ||
186 | + } | ||
187 | + ll = sl = datalo; | ||
188 | + lh = sh = datahi; | ||
189 | + } else { | ||
190 | + /* | ||
191 | + * 16-byte atomicity without LSE2 requires LDXP+STXP loop: | ||
192 | + * 1: ldxp t0, t1, [base] | ||
193 | + * stxp t0, lo, hi, [base] | ||
194 | + * cbnz t0, 1b | ||
195 | + */ | ||
196 | + tcg_debug_assert(base != TCG_REG_TMP0 && base != TCG_REG_TMP1); | ||
197 | + ll = TCG_REG_TMP0; | ||
198 | + lh = TCG_REG_TMP1; | ||
199 | + sl = datalo; | ||
200 | + sh = datahi; | ||
201 | + } | ||
202 | + | ||
203 | + tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR, ll, lh, base); | ||
204 | + tcg_out_insn(s, 3306, STXP, TCG_REG_TMP0, sl, sh, base); | ||
205 | + tcg_out_insn(s, 3201, CBNZ, 0, TCG_REG_TMP0, -2); | ||
206 | + | ||
207 | + if (use_pair) { | ||
208 | + /* "b .+8", branching across the one insn of use_pair. */ | ||
209 | + tcg_out_insn(s, 3206, B, 2); | ||
210 | + reloc_pc19(branch, tcg_splitwx_to_rx(s->code_ptr)); | ||
211 | + } | ||
212 | + } | ||
213 | + | ||
214 | + if (use_pair) { | ||
215 | + if (is_ld) { | ||
216 | + tcg_out_insn(s, 3314, LDP, datalo, datahi, base, 0, 1, 0); | ||
217 | + } else { | ||
218 | + tcg_out_insn(s, 3314, STP, datalo, datahi, base, 0, 1, 0); | ||
219 | + } | ||
220 | + } | ||
221 | + | ||
222 | + if (ldst) { | ||
223 | + ldst->type = TCG_TYPE_I128; | ||
224 | + ldst->datalo_reg = datalo; | ||
225 | + ldst->datahi_reg = datahi; | ||
226 | + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); | ||
227 | + } | ||
228 | +} | ||
229 | + | ||
230 | static const tcg_insn_unit *tb_ret_addr; | ||
231 | |||
232 | static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) | ||
233 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, | ||
234 | case INDEX_op_qemu_st_a64_i64: | ||
235 | tcg_out_qemu_st(s, REG0(0), a1, a2, ext); | ||
236 | break; | ||
237 | + case INDEX_op_qemu_ld_a32_i128: | ||
238 | + case INDEX_op_qemu_ld_a64_i128: | ||
239 | + tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], true); | ||
165 | + break; | 240 | + break; |
166 | + | 241 | + case INDEX_op_qemu_st_a32_i128: |
167 | + default: | 242 | + case INDEX_op_qemu_st_a64_i128: |
168 | + g_assert_not_reached(); | 243 | + tcg_out_qemu_ldst_i128(s, REG0(0), REG0(1), a2, args[3], false); |
169 | + } | ||
170 | + tcg_temp_free_vec(t1); | ||
171 | + tcg_temp_free_vec(t2); | ||
172 | +} | ||
173 | + | ||
174 | void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, | ||
175 | TCGArg a0, ...) | ||
176 | { | ||
177 | @@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, | ||
178 | v2 = temp_tcgv_vec(arg_temp(a2)); | ||
179 | expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg)); | ||
180 | break; | ||
181 | + case INDEX_op_mul_vec: | ||
182 | + v2 = temp_tcgv_vec(arg_temp(a2)); | ||
183 | + expand_vec_mul(type, vece, v0, v1, v2); | ||
184 | + break; | 244 | + break; |
185 | default: | 245 | |
186 | g_assert_not_reached(); | 246 | case INDEX_op_bswap64_i64: |
187 | } | 247 | tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1); |
188 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | 248 | @@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) |
189 | static const TCGTargetOpDef v_r = { .args_ct_str = { "v", "r" } }; | 249 | case INDEX_op_qemu_ld_a32_i64: |
190 | static const TCGTargetOpDef v_v = { .args_ct_str = { "v", "v" } }; | 250 | case INDEX_op_qemu_ld_a64_i64: |
191 | static const TCGTargetOpDef v_v_v = { .args_ct_str = { "v", "v", "v" } }; | 251 | return C_O1_I1(r, r); |
192 | + static const TCGTargetOpDef v_v_v_v | 252 | + case INDEX_op_qemu_ld_a32_i128: |
193 | + = { .args_ct_str = { "v", "v", "v", "v" } }; | 253 | + case INDEX_op_qemu_ld_a64_i128: |
194 | 254 | + return C_O2_I1(r, r, r); | |
195 | switch (op) { | 255 | case INDEX_op_qemu_st_a32_i32: |
196 | case INDEX_op_goto_ptr: | 256 | case INDEX_op_qemu_st_a64_i32: |
197 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | 257 | case INDEX_op_qemu_st_a32_i64: |
198 | 258 | case INDEX_op_qemu_st_a64_i64: | |
199 | case INDEX_op_add_vec: | 259 | return C_O0_I2(rZ, r); |
200 | case INDEX_op_sub_vec: | 260 | + case INDEX_op_qemu_st_a32_i128: |
201 | + case INDEX_op_mul_vec: | 261 | + case INDEX_op_qemu_st_a64_i128: |
202 | case INDEX_op_and_vec: | 262 | + return C_O0_I3(rZ, rZ, r); |
203 | case INDEX_op_or_vec: | 263 | |
204 | case INDEX_op_xor_vec: | 264 | case INDEX_op_deposit_i32: |
205 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | 265 | case INDEX_op_deposit_i64: |
206 | case INDEX_op_shlv_vec: | ||
207 | case INDEX_op_shrv_vec: | ||
208 | case INDEX_op_sarv_vec: | ||
209 | + case INDEX_op_ppc_mrgh_vec: | ||
210 | + case INDEX_op_ppc_mrgl_vec: | ||
211 | + case INDEX_op_ppc_muleu_vec: | ||
212 | + case INDEX_op_ppc_mulou_vec: | ||
213 | + case INDEX_op_ppc_pkum_vec: | ||
214 | + case INDEX_op_ppc_rotl_vec: | ||
215 | return &v_v_v; | ||
216 | case INDEX_op_not_vec: | ||
217 | case INDEX_op_dup_vec: | ||
218 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
219 | case INDEX_op_st_vec: | ||
220 | case INDEX_op_dupm_vec: | ||
221 | return &v_r; | ||
222 | + case INDEX_op_ppc_msum_vec: | ||
223 | + return &v_v_v_v; | ||
224 | |||
225 | default: | ||
226 | return NULL; | ||
227 | -- | 266 | -- |
228 | 2.17.1 | 267 | 2.34.1 |
229 | |||
230 | diff view generated by jsdifflib |
1 | These new instructions are conditional only on MSR.VEC and | 1 | Use LQ/STQ with ISA v2.07, and 16-byte atomicity is required. |
---|---|---|---|
2 | are thus part of the Altivec instruction set, and not VSX. | 2 | Note that these instructions do not require 16-byte alignment. |
3 | This includes lots of double-word arithmetic and a few extra | ||
4 | logical operations. | ||
5 | 3 | ||
6 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> | 4 | Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com> |
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
8 | --- | 6 | --- |
9 | tcg/ppc/tcg-target.h | 4 +- | 7 | tcg/ppc/tcg-target-con-set.h | 2 + |
10 | tcg/ppc/tcg-target.inc.c | 85 ++++++++++++++++++++++++++++++---------- | 8 | tcg/ppc/tcg-target-con-str.h | 1 + |
11 | 2 files changed, 67 insertions(+), 22 deletions(-) | 9 | tcg/ppc/tcg-target.h | 3 +- |
10 | tcg/ppc/tcg-target.c.inc | 108 +++++++++++++++++++++++++++++++---- | ||
11 | 4 files changed, 101 insertions(+), 13 deletions(-) | ||
12 | 12 | ||
13 | diff --git a/tcg/ppc/tcg-target-con-set.h b/tcg/ppc/tcg-target-con-set.h | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/tcg/ppc/tcg-target-con-set.h | ||
16 | +++ b/tcg/ppc/tcg-target-con-set.h | ||
17 | @@ -XXX,XX +XXX,XX @@ C_O0_I2(r, r) | ||
18 | C_O0_I2(r, ri) | ||
19 | C_O0_I2(v, r) | ||
20 | C_O0_I3(r, r, r) | ||
21 | +C_O0_I3(o, m, r) | ||
22 | C_O0_I4(r, r, ri, ri) | ||
23 | C_O0_I4(r, r, r, r) | ||
24 | C_O1_I1(r, r) | ||
25 | @@ -XXX,XX +XXX,XX @@ C_O1_I3(v, v, v, v) | ||
26 | C_O1_I4(r, r, ri, rZ, rZ) | ||
27 | C_O1_I4(r, r, r, ri, ri) | ||
28 | C_O2_I1(r, r, r) | ||
29 | +C_O2_I1(o, m, r) | ||
30 | C_O2_I2(r, r, r, r) | ||
31 | C_O2_I4(r, r, rI, rZM, r, r) | ||
32 | C_O2_I4(r, r, r, r, rI, rZM) | ||
33 | diff --git a/tcg/ppc/tcg-target-con-str.h b/tcg/ppc/tcg-target-con-str.h | ||
34 | index XXXXXXX..XXXXXXX 100644 | ||
35 | --- a/tcg/ppc/tcg-target-con-str.h | ||
36 | +++ b/tcg/ppc/tcg-target-con-str.h | ||
37 | @@ -XXX,XX +XXX,XX @@ | ||
38 | * REGS(letter, register_mask) | ||
39 | */ | ||
40 | REGS('r', ALL_GENERAL_REGS) | ||
41 | +REGS('o', ALL_GENERAL_REGS & 0xAAAAAAAAu) /* odd registers */ | ||
42 | REGS('v', ALL_VECTOR_REGS) | ||
43 | |||
44 | /* | ||
13 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | 45 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h |
14 | index XXXXXXX..XXXXXXX 100644 | 46 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/tcg/ppc/tcg-target.h | 47 | --- a/tcg/ppc/tcg-target.h |
16 | +++ b/tcg/ppc/tcg-target.h | 48 | +++ b/tcg/ppc/tcg-target.h |
17 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
18 | typedef enum { | ||
19 | tcg_isa_base, | ||
20 | tcg_isa_2_06, | ||
21 | + tcg_isa_2_07, | ||
22 | tcg_isa_3_00, | ||
23 | } TCGPowerISA; | ||
24 | |||
25 | @@ -XXX,XX +XXX,XX @@ extern bool have_altivec; | ||
26 | extern bool have_vsx; | ||
27 | |||
28 | #define have_isa_2_06 (have_isa >= tcg_isa_2_06) | ||
29 | +#define have_isa_2_07 (have_isa >= tcg_isa_2_07) | ||
30 | #define have_isa_3_00 (have_isa >= tcg_isa_3_00) | ||
31 | |||
32 | /* optional instructions automatically implemented */ | ||
33 | @@ -XXX,XX +XXX,XX @@ extern bool have_vsx; | 49 | @@ -XXX,XX +XXX,XX @@ extern bool have_vsx; |
34 | #define TCG_TARGET_HAS_v256 0 | 50 | #define TCG_TARGET_HAS_mulsh_i64 1 |
35 | 51 | #endif | |
36 | #define TCG_TARGET_HAS_andc_vec 1 | 52 | |
37 | -#define TCG_TARGET_HAS_orc_vec 0 | 53 | -#define TCG_TARGET_HAS_qemu_ldst_i128 0 |
38 | +#define TCG_TARGET_HAS_orc_vec have_isa_2_07 | 54 | +#define TCG_TARGET_HAS_qemu_ldst_i128 \ |
39 | #define TCG_TARGET_HAS_not_vec 1 | 55 | + (TCG_TARGET_REG_BITS == 64 && have_isa_2_07) |
40 | #define TCG_TARGET_HAS_neg_vec 0 | 56 | |
41 | #define TCG_TARGET_HAS_abs_vec 0 | 57 | /* |
42 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 58 | * While technically Altivec could support V64, it has no 64-bit store |
43 | index XXXXXXX..XXXXXXX 100644 | 59 | diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc |
44 | --- a/tcg/ppc/tcg-target.inc.c | 60 | index XXXXXXX..XXXXXXX 100644 |
45 | +++ b/tcg/ppc/tcg-target.inc.c | 61 | --- a/tcg/ppc/tcg-target.c.inc |
46 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | 62 | +++ b/tcg/ppc/tcg-target.c.inc |
47 | #define VADDSWS VX4(896) | 63 | @@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) |
48 | #define VADDUWS VX4(640) | 64 | |
49 | #define VADDUWM VX4(128) | 65 | #define B OPCD( 18) |
50 | +#define VADDUDM VX4(192) /* v2.07 */ | 66 | #define BC OPCD( 16) |
51 | 67 | + | |
52 | #define VSUBSBS VX4(1792) | 68 | #define LBZ OPCD( 34) |
53 | #define VSUBUBS VX4(1536) | 69 | #define LHZ OPCD( 40) |
54 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | 70 | #define LHA OPCD( 42) |
55 | #define VSUBSWS VX4(1920) | 71 | #define LWZ OPCD( 32) |
56 | #define VSUBUWS VX4(1664) | 72 | #define LWZUX XO31( 55) |
57 | #define VSUBUWM VX4(1152) | 73 | -#define STB OPCD( 38) |
58 | +#define VSUBUDM VX4(1216) /* v2.07 */ | 74 | -#define STH OPCD( 44) |
59 | 75 | -#define STW OPCD( 36) | |
60 | #define VMAXSB VX4(258) | 76 | - |
61 | #define VMAXSH VX4(322) | 77 | -#define STD XO62( 0) |
62 | #define VMAXSW VX4(386) | 78 | -#define STDU XO62( 1) |
63 | +#define VMAXSD VX4(450) /* v2.07 */ | 79 | -#define STDX XO31(149) |
64 | #define VMAXUB VX4(2) | 80 | - |
65 | #define VMAXUH VX4(66) | 81 | #define LD XO58( 0) |
66 | #define VMAXUW VX4(130) | 82 | #define LDX XO31( 21) |
67 | +#define VMAXUD VX4(194) /* v2.07 */ | 83 | #define LDU XO58( 1) |
68 | #define VMINSB VX4(770) | 84 | #define LDUX XO31( 53) |
69 | #define VMINSH VX4(834) | 85 | #define LWA XO58( 2) |
70 | #define VMINSW VX4(898) | 86 | #define LWAX XO31(341) |
71 | +#define VMINSD VX4(962) /* v2.07 */ | 87 | +#define LQ OPCD( 56) |
72 | #define VMINUB VX4(514) | 88 | + |
73 | #define VMINUH VX4(578) | 89 | +#define STB OPCD( 38) |
74 | #define VMINUW VX4(642) | 90 | +#define STH OPCD( 44) |
75 | +#define VMINUD VX4(706) /* v2.07 */ | 91 | +#define STW OPCD( 36) |
76 | 92 | +#define STD XO62( 0) | |
77 | #define VCMPEQUB VX4(6) | 93 | +#define STDU XO62( 1) |
78 | #define VCMPEQUH VX4(70) | 94 | +#define STDX XO31(149) |
79 | #define VCMPEQUW VX4(134) | 95 | +#define STQ XO62( 2) |
80 | +#define VCMPEQUD VX4(199) /* v2.07 */ | 96 | |
81 | #define VCMPGTSB VX4(774) | 97 | #define ADDIC OPCD( 12) |
82 | #define VCMPGTSH VX4(838) | 98 | #define ADDI OPCD( 14) |
83 | #define VCMPGTSW VX4(902) | 99 | @@ -XXX,XX +XXX,XX @@ typedef struct { |
84 | +#define VCMPGTSD VX4(967) /* v2.07 */ | 100 | |
85 | #define VCMPGTUB VX4(518) | 101 | bool tcg_target_has_memory_bswap(MemOp memop) |
86 | #define VCMPGTUH VX4(582) | 102 | { |
87 | #define VCMPGTUW VX4(646) | 103 | - return true; |
88 | +#define VCMPGTUD VX4(711) /* v2.07 */ | 104 | + TCGAtomAlign aa; |
89 | 105 | + | |
90 | #define VSLB VX4(260) | 106 | + if ((memop & MO_SIZE) <= MO_64) { |
91 | #define VSLH VX4(324) | 107 | + return true; |
92 | #define VSLW VX4(388) | 108 | + } |
93 | +#define VSLD VX4(1476) /* v2.07 */ | 109 | + |
94 | #define VSRB VX4(516) | 110 | + /* |
95 | #define VSRH VX4(580) | 111 | + * Reject 16-byte memop with 16-byte atomicity, |
96 | #define VSRW VX4(644) | 112 | + * but do allow a pair of 64-bit operations. |
97 | +#define VSRD VX4(1732) /* v2.07 */ | 113 | + */ |
98 | #define VSRAB VX4(772) | 114 | + aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true); |
99 | #define VSRAH VX4(836) | 115 | + return aa.atom <= MO_64; |
100 | #define VSRAW VX4(900) | 116 | } |
101 | +#define VSRAD VX4(964) /* v2.07 */ | 117 | |
102 | #define VRLB VX4(4) | 118 | /* |
103 | #define VRLH VX4(68) | 119 | @@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, |
104 | #define VRLW VX4(132) | 120 | { |
105 | +#define VRLD VX4(196) /* v2.07 */ | 121 | TCGLabelQemuLdst *ldst = NULL; |
106 | 122 | MemOp opc = get_memop(oi); | |
107 | #define VMULEUB VX4(520) | 123 | - MemOp a_bits; |
108 | #define VMULEUH VX4(584) | 124 | + MemOp a_bits, s_bits; |
109 | +#define VMULEUW VX4(648) /* v2.07 */ | 125 | |
110 | #define VMULOUB VX4(8) | 126 | /* |
111 | #define VMULOUH VX4(72) | 127 | * Book II, Section 1.4, Single-Copy Atomicity, specifies: |
112 | +#define VMULOUW VX4(136) /* v2.07 */ | 128 | @@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, |
113 | +#define VMULUWM VX4(137) /* v2.07 */ | 129 | * As of 3.0, "the non-atomic access is performed as described in |
114 | #define VMSUMUHM VX4(38) | 130 | * the corresponding list", which matches MO_ATOM_SUBALIGN. |
115 | 131 | */ | |
116 | #define VMRGHB VX4(12) | 132 | + s_bits = opc & MO_SIZE; |
117 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | 133 | h->aa = atom_and_align_for_opc(s, opc, |
118 | #define VNOR VX4(1284) | 134 | have_isa_3_00 ? MO_ATOM_SUBALIGN |
119 | #define VOR VX4(1156) | 135 | : MO_ATOM_IFALIGN, |
120 | #define VXOR VX4(1220) | 136 | - false); |
121 | +#define VEQV VX4(1668) /* v2.07 */ | 137 | + s_bits == MO_128); |
122 | +#define VNAND VX4(1412) /* v2.07 */ | 138 | a_bits = h->aa.align; |
123 | +#define VORC VX4(1348) /* v2.07 */ | 139 | |
124 | 140 | #ifdef CONFIG_SOFTMMU | |
125 | #define VSPLTB VX4(524) | 141 | @@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, |
126 | #define VSPLTH VX4(588) | 142 | int fast_off = TLB_MASK_TABLE_OFS(mem_index); |
127 | @@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | 143 | int mask_off = fast_off + offsetof(CPUTLBDescFast, mask); |
128 | case INDEX_op_andc_vec: | 144 | int table_off = fast_off + offsetof(CPUTLBDescFast, table); |
129 | case INDEX_op_not_vec: | 145 | - unsigned s_bits = opc & MO_SIZE; |
130 | return 1; | 146 | |
131 | + case INDEX_op_orc_vec: | 147 | ldst = new_ldst_label(s); |
132 | + return have_isa_2_07; | 148 | ldst->is_ld = is_ld; |
149 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, | ||
150 | } | ||
151 | } | ||
152 | |||
153 | +static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi, | ||
154 | + TCGReg addr_reg, MemOpIdx oi, bool is_ld) | ||
155 | +{ | ||
156 | + TCGLabelQemuLdst *ldst; | ||
157 | + HostAddress h; | ||
158 | + bool need_bswap; | ||
159 | + uint32_t insn; | ||
160 | + TCGReg index; | ||
161 | + | ||
162 | + ldst = prepare_host_addr(s, &h, addr_reg, -1, oi, is_ld); | ||
163 | + | ||
164 | + /* Compose the final address, as LQ/STQ have no indexing. */ | ||
165 | + index = h.index; | ||
166 | + if (h.base != 0) { | ||
167 | + index = TCG_REG_TMP1; | ||
168 | + tcg_out32(s, ADD | TAB(index, h.base, h.index)); | ||
169 | + } | ||
170 | + need_bswap = get_memop(oi) & MO_BSWAP; | ||
171 | + | ||
172 | + if (h.aa.atom == MO_128) { | ||
173 | + tcg_debug_assert(!need_bswap); | ||
174 | + tcg_debug_assert(datalo & 1); | ||
175 | + tcg_debug_assert(datahi == datalo - 1); | ||
176 | + insn = is_ld ? LQ : STQ; | ||
177 | + tcg_out32(s, insn | TAI(datahi, index, 0)); | ||
178 | + } else { | ||
179 | + TCGReg d1, d2; | ||
180 | + | ||
181 | + if (HOST_BIG_ENDIAN ^ need_bswap) { | ||
182 | + d1 = datahi, d2 = datalo; | ||
183 | + } else { | ||
184 | + d1 = datalo, d2 = datahi; | ||
185 | + } | ||
186 | + | ||
187 | + if (need_bswap) { | ||
188 | + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 8); | ||
189 | + insn = is_ld ? LDBRX : STDBRX; | ||
190 | + tcg_out32(s, insn | TAB(d1, 0, index)); | ||
191 | + tcg_out32(s, insn | TAB(d2, index, TCG_REG_R0)); | ||
192 | + } else { | ||
193 | + insn = is_ld ? LD : STD; | ||
194 | + tcg_out32(s, insn | TAI(d1, index, 0)); | ||
195 | + tcg_out32(s, insn | TAI(d2, index, 8)); | ||
196 | + } | ||
197 | + } | ||
198 | + | ||
199 | + if (ldst) { | ||
200 | + ldst->type = TCG_TYPE_I128; | ||
201 | + ldst->datalo_reg = datalo; | ||
202 | + ldst->datahi_reg = datahi; | ||
203 | + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); | ||
204 | + } | ||
205 | +} | ||
206 | + | ||
207 | static void tcg_out_nop_fill(tcg_insn_unit *p, int count) | ||
208 | { | ||
209 | int i; | ||
210 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, | ||
211 | args[4], TCG_TYPE_I64); | ||
212 | } | ||
213 | break; | ||
214 | + case INDEX_op_qemu_ld_a32_i128: | ||
215 | + case INDEX_op_qemu_ld_a64_i128: | ||
216 | + tcg_debug_assert(TCG_TARGET_REG_BITS == 64); | ||
217 | + tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true); | ||
218 | + break; | ||
219 | |||
220 | case INDEX_op_qemu_st_a64_i32: | ||
221 | if (TCG_TARGET_REG_BITS == 32) { | ||
222 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, | ||
223 | args[4], TCG_TYPE_I64); | ||
224 | } | ||
225 | break; | ||
226 | + case INDEX_op_qemu_st_a32_i128: | ||
227 | + case INDEX_op_qemu_st_a64_i128: | ||
228 | + tcg_debug_assert(TCG_TARGET_REG_BITS == 64); | ||
229 | + tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false); | ||
230 | + break; | ||
231 | |||
232 | case INDEX_op_setcond_i32: | ||
233 | tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], | ||
234 | @@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) | ||
235 | case INDEX_op_qemu_st_a64_i64: | ||
236 | return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I4(r, r, r, r); | ||
237 | |||
238 | + case INDEX_op_qemu_ld_a32_i128: | ||
239 | + case INDEX_op_qemu_ld_a64_i128: | ||
240 | + return C_O2_I1(o, m, r); | ||
241 | + case INDEX_op_qemu_st_a32_i128: | ||
242 | + case INDEX_op_qemu_st_a64_i128: | ||
243 | + return C_O0_I3(o, m, r); | ||
244 | + | ||
133 | case INDEX_op_add_vec: | 245 | case INDEX_op_add_vec: |
134 | case INDEX_op_sub_vec: | 246 | case INDEX_op_sub_vec: |
135 | case INDEX_op_smax_vec: | 247 | case INDEX_op_mul_vec: |
136 | case INDEX_op_smin_vec: | ||
137 | case INDEX_op_umax_vec: | ||
138 | case INDEX_op_umin_vec: | ||
139 | + case INDEX_op_shlv_vec: | ||
140 | + case INDEX_op_shrv_vec: | ||
141 | + case INDEX_op_sarv_vec: | ||
142 | + return vece <= MO_32 || have_isa_2_07; | ||
143 | case INDEX_op_ssadd_vec: | ||
144 | case INDEX_op_sssub_vec: | ||
145 | case INDEX_op_usadd_vec: | ||
146 | case INDEX_op_ussub_vec: | ||
147 | - case INDEX_op_shlv_vec: | ||
148 | - case INDEX_op_shrv_vec: | ||
149 | - case INDEX_op_sarv_vec: | ||
150 | return vece <= MO_32; | ||
151 | case INDEX_op_cmp_vec: | ||
152 | - case INDEX_op_mul_vec: | ||
153 | case INDEX_op_shli_vec: | ||
154 | case INDEX_op_shri_vec: | ||
155 | case INDEX_op_sari_vec: | ||
156 | - return vece <= MO_32 ? -1 : 0; | ||
157 | + return vece <= MO_32 || have_isa_2_07 ? -1 : 0; | ||
158 | + case INDEX_op_mul_vec: | ||
159 | + switch (vece) { | ||
160 | + case MO_8: | ||
161 | + case MO_16: | ||
162 | + return -1; | ||
163 | + case MO_32: | ||
164 | + return have_isa_2_07 ? 1 : -1; | ||
165 | + } | ||
166 | + return 0; | ||
167 | case INDEX_op_bitsel_vec: | ||
168 | return have_vsx; | ||
169 | default: | ||
170 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
171 | const TCGArg *args, const int *const_args) | ||
172 | { | ||
173 | static const uint32_t | ||
174 | - add_op[4] = { VADDUBM, VADDUHM, VADDUWM, 0 }, | ||
175 | - sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, 0 }, | ||
176 | - eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 }, | ||
177 | - gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 }, | ||
178 | - gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 }, | ||
179 | + add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM }, | ||
180 | + sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM }, | ||
181 | + eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD }, | ||
182 | + gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD }, | ||
183 | + gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD }, | ||
184 | ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 }, | ||
185 | usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 }, | ||
186 | sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 }, | ||
187 | ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 }, | ||
188 | - umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 }, | ||
189 | - smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 }, | ||
190 | - umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 }, | ||
191 | - smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 }, | ||
192 | - shlv_op[4] = { VSLB, VSLH, VSLW, 0 }, | ||
193 | - shrv_op[4] = { VSRB, VSRH, VSRW, 0 }, | ||
194 | - sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 }, | ||
195 | + umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD }, | ||
196 | + smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD }, | ||
197 | + umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD }, | ||
198 | + smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD }, | ||
199 | + shlv_op[4] = { VSLB, VSLH, VSLW, VSLD }, | ||
200 | + shrv_op[4] = { VSRB, VSRH, VSRW, VSRD }, | ||
201 | + sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD }, | ||
202 | mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 }, | ||
203 | mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 }, | ||
204 | - muleu_op[4] = { VMULEUB, VMULEUH, 0, 0 }, | ||
205 | - mulou_op[4] = { VMULOUB, VMULOUH, 0, 0 }, | ||
206 | + muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 }, | ||
207 | + mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 }, | ||
208 | pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 }, | ||
209 | - rotl_op[4] = { VRLB, VRLH, VRLW, 0 }; | ||
210 | + rotl_op[4] = { VRLB, VRLH, VRLW, VRLD }; | ||
211 | |||
212 | TCGType type = vecl + TCG_TYPE_V64; | ||
213 | TCGArg a0 = args[0], a1 = args[1], a2 = args[2]; | ||
214 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
215 | case INDEX_op_sub_vec: | ||
216 | insn = sub_op[vece]; | ||
217 | break; | ||
218 | + case INDEX_op_mul_vec: | ||
219 | + tcg_debug_assert(vece == MO_32 && have_isa_2_07); | ||
220 | + insn = VMULUWM; | ||
221 | + break; | ||
222 | case INDEX_op_ssadd_vec: | ||
223 | insn = ssadd_op[vece]; | ||
224 | break; | ||
225 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
226 | insn = VNOR; | ||
227 | a2 = a1; | ||
228 | break; | ||
229 | + case INDEX_op_orc_vec: | ||
230 | + insn = VORC; | ||
231 | + break; | ||
232 | |||
233 | case INDEX_op_cmp_vec: | ||
234 | switch (args[3]) { | ||
235 | @@ -XXX,XX +XXX,XX @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, | ||
236 | { | ||
237 | bool need_swap = false, need_inv = false; | ||
238 | |||
239 | - tcg_debug_assert(vece <= MO_32); | ||
240 | + tcg_debug_assert(vece <= MO_32 || have_isa_2_07); | ||
241 | |||
242 | switch (cond) { | ||
243 | case TCG_COND_EQ: | ||
244 | @@ -XXX,XX +XXX,XX @@ static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0, | ||
245 | break; | ||
246 | |||
247 | case MO_32: | ||
248 | + tcg_debug_assert(!have_isa_2_07); | ||
249 | t3 = tcg_temp_new_vec(type); | ||
250 | t4 = tcg_temp_new_vec(type); | ||
251 | tcg_gen_dupi_vec(MO_8, t4, -16); | ||
252 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | ||
253 | if (hwcap & PPC_FEATURE_ARCH_2_06) { | ||
254 | have_isa = tcg_isa_2_06; | ||
255 | } | ||
256 | +#ifdef PPC_FEATURE2_ARCH_2_07 | ||
257 | + if (hwcap2 & PPC_FEATURE2_ARCH_2_07) { | ||
258 | + have_isa = tcg_isa_2_07; | ||
259 | + } | ||
260 | +#endif | ||
261 | #ifdef PPC_FEATURE2_ARCH_3_00 | ||
262 | if (hwcap2 & PPC_FEATURE2_ARCH_3_00) { | ||
263 | have_isa = tcg_isa_3_00; | ||
264 | -- | 248 | -- |
265 | 2.17.1 | 249 | 2.34.1 |
266 | |||
267 | diff view generated by jsdifflib |
1 | From: Alex Bennée <alex.bennee@linaro.org> | 1 | Use LPQ/STPQ when 16-byte atomicity is required. |
---|---|---|---|
2 | Note that these instructions require 16-byte alignment. | ||
2 | 3 | ||
3 | qemu_cpu_kick is used for a number of reasons including to indicate | 4 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
4 | there is work to be done. However when thread=single the old | ||
5 | qemu_cpu_kick_rr_cpu only advanced the vCPU to the next executing one | ||
6 | which can lead to a hang in the case that: | ||
7 | |||
8 | a) the kick is from outside the vCPUs (e.g. iothread) | ||
9 | b) the timers are paused (i.e. iothread calling run_on_cpu) | ||
10 | |||
11 | To avoid this lets split qemu_cpu_kick_rr into two functions. One for | ||
12 | the timer which continues to advance to the next timeslice and another | ||
13 | for all other kicks. | ||
14 | |||
15 | Message-Id: <20191001160426.26644-1-alex.bennee@linaro.org> | ||
16 | Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> | ||
17 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
18 | Signed-off-by: Alex Bennée <alex.bennee@linaro.org> | ||
19 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
20 | --- | 6 | --- |
21 | cpus.c | 24 ++++++++++++++++++------ | 7 | tcg/s390x/tcg-target-con-set.h | 2 + |
22 | 1 file changed, 18 insertions(+), 6 deletions(-) | 8 | tcg/s390x/tcg-target.h | 2 +- |
9 | tcg/s390x/tcg-target.c.inc | 107 ++++++++++++++++++++++++++++++++- | ||
10 | 3 files changed, 107 insertions(+), 4 deletions(-) | ||
23 | 11 | ||
24 | diff --git a/cpus.c b/cpus.c | 12 | diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h |
25 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
26 | --- a/cpus.c | 14 | --- a/tcg/s390x/tcg-target-con-set.h |
27 | +++ b/cpus.c | 15 | +++ b/tcg/s390x/tcg-target-con-set.h |
28 | @@ -XXX,XX +XXX,XX @@ static inline int64_t qemu_tcg_next_kick(void) | 16 | @@ -XXX,XX +XXX,XX @@ C_O0_I2(r, r) |
29 | return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD; | 17 | C_O0_I2(r, ri) |
18 | C_O0_I2(r, rA) | ||
19 | C_O0_I2(v, r) | ||
20 | +C_O0_I3(o, m, r) | ||
21 | C_O1_I1(r, r) | ||
22 | C_O1_I1(v, r) | ||
23 | C_O1_I1(v, v) | ||
24 | @@ -XXX,XX +XXX,XX @@ C_O1_I2(v, v, v) | ||
25 | C_O1_I3(v, v, v, v) | ||
26 | C_O1_I4(r, r, ri, rI, r) | ||
27 | C_O1_I4(r, r, rA, rI, r) | ||
28 | +C_O2_I1(o, m, r) | ||
29 | C_O2_I2(o, m, 0, r) | ||
30 | C_O2_I2(o, m, r, r) | ||
31 | C_O2_I3(o, m, 0, 1, r) | ||
32 | diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h | ||
33 | index XXXXXXX..XXXXXXX 100644 | ||
34 | --- a/tcg/s390x/tcg-target.h | ||
35 | +++ b/tcg/s390x/tcg-target.h | ||
36 | @@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3]; | ||
37 | #define TCG_TARGET_HAS_muluh_i64 0 | ||
38 | #define TCG_TARGET_HAS_mulsh_i64 0 | ||
39 | |||
40 | -#define TCG_TARGET_HAS_qemu_ldst_i128 0 | ||
41 | +#define TCG_TARGET_HAS_qemu_ldst_i128 1 | ||
42 | |||
43 | #define TCG_TARGET_HAS_v64 HAVE_FACILITY(VECTOR) | ||
44 | #define TCG_TARGET_HAS_v128 HAVE_FACILITY(VECTOR) | ||
45 | diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc | ||
46 | index XXXXXXX..XXXXXXX 100644 | ||
47 | --- a/tcg/s390x/tcg-target.c.inc | ||
48 | +++ b/tcg/s390x/tcg-target.c.inc | ||
49 | @@ -XXX,XX +XXX,XX @@ typedef enum S390Opcode { | ||
50 | RXY_LLGF = 0xe316, | ||
51 | RXY_LLGH = 0xe391, | ||
52 | RXY_LMG = 0xeb04, | ||
53 | + RXY_LPQ = 0xe38f, | ||
54 | RXY_LRV = 0xe31e, | ||
55 | RXY_LRVG = 0xe30f, | ||
56 | RXY_LRVH = 0xe31f, | ||
57 | @@ -XXX,XX +XXX,XX @@ typedef enum S390Opcode { | ||
58 | RXY_STG = 0xe324, | ||
59 | RXY_STHY = 0xe370, | ||
60 | RXY_STMG = 0xeb24, | ||
61 | + RXY_STPQ = 0xe38e, | ||
62 | RXY_STRV = 0xe33e, | ||
63 | RXY_STRVG = 0xe32f, | ||
64 | RXY_STRVH = 0xe33f, | ||
65 | @@ -XXX,XX +XXX,XX @@ typedef struct { | ||
66 | |||
67 | bool tcg_target_has_memory_bswap(MemOp memop) | ||
68 | { | ||
69 | - return true; | ||
70 | + TCGAtomAlign aa; | ||
71 | + | ||
72 | + if ((memop & MO_SIZE) <= MO_64) { | ||
73 | + return true; | ||
74 | + } | ||
75 | + | ||
76 | + /* | ||
77 | + * Reject 16-byte memop with 16-byte atomicity, | ||
78 | + * but do allow a pair of 64-bit operations. | ||
79 | + */ | ||
80 | + aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true); | ||
81 | + return aa.atom <= MO_64; | ||
30 | } | 82 | } |
31 | 83 | ||
32 | -/* Kick the currently round-robin scheduled vCPU */ | 84 | static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data, |
33 | -static void qemu_cpu_kick_rr_cpu(void) | 85 | @@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, |
34 | +/* Kick the currently round-robin scheduled vCPU to next */ | ||
35 | +static void qemu_cpu_kick_rr_next_cpu(void) | ||
36 | { | 86 | { |
37 | CPUState *cpu; | 87 | TCGLabelQemuLdst *ldst = NULL; |
38 | do { | 88 | MemOp opc = get_memop(oi); |
39 | @@ -XXX,XX +XXX,XX @@ static void qemu_cpu_kick_rr_cpu(void) | 89 | + MemOp s_bits = opc & MO_SIZE; |
40 | } while (cpu != atomic_mb_read(&tcg_current_rr_cpu)); | 90 | unsigned a_mask; |
91 | |||
92 | - h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false); | ||
93 | + h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128); | ||
94 | a_mask = (1 << h->aa.align) - 1; | ||
95 | |||
96 | #ifdef CONFIG_SOFTMMU | ||
97 | - unsigned s_bits = opc & MO_SIZE; | ||
98 | unsigned s_mask = (1 << s_bits) - 1; | ||
99 | int mem_index = get_mmuidx(oi); | ||
100 | int fast_off = TLB_MASK_TABLE_OFS(mem_index); | ||
101 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, | ||
102 | } | ||
41 | } | 103 | } |
42 | 104 | ||
43 | +/* Kick all RR vCPUs */ | 105 | +static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi, |
44 | +static void qemu_cpu_kick_rr_cpus(void) | 106 | + TCGReg addr_reg, MemOpIdx oi, bool is_ld) |
45 | +{ | 107 | +{ |
46 | + CPUState *cpu; | 108 | + TCGLabel *l1 = NULL, *l2 = NULL; |
47 | + | 109 | + TCGLabelQemuLdst *ldst; |
48 | + CPU_FOREACH(cpu) { | 110 | + HostAddress h; |
49 | + cpu_exit(cpu); | 111 | + bool need_bswap; |
50 | + }; | 112 | + bool use_pair; |
113 | + S390Opcode insn; | ||
114 | + | ||
115 | + ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld); | ||
116 | + | ||
117 | + use_pair = h.aa.atom < MO_128; | ||
118 | + need_bswap = get_memop(oi) & MO_BSWAP; | ||
119 | + | ||
120 | + if (!use_pair) { | ||
121 | + /* | ||
122 | + * Atomicity requires we use LPQ. If we've already checked for | ||
123 | + * 16-byte alignment, that's all we need. If we arrive with | ||
124 | + * lesser alignment, we have determined that less than 16-byte | ||
125 | + * alignment can be satisfied with two 8-byte loads. | ||
126 | + */ | ||
127 | + if (h.aa.align < MO_128) { | ||
128 | + use_pair = true; | ||
129 | + l1 = gen_new_label(); | ||
130 | + l2 = gen_new_label(); | ||
131 | + | ||
132 | + tcg_out_insn(s, RI, TMLL, addr_reg, 15); | ||
133 | + tgen_branch(s, 7, l1); /* CC in {1,2,3} */ | ||
134 | + } | ||
135 | + | ||
136 | + tcg_debug_assert(!need_bswap); | ||
137 | + tcg_debug_assert(datalo & 1); | ||
138 | + tcg_debug_assert(datahi == datalo - 1); | ||
139 | + insn = is_ld ? RXY_LPQ : RXY_STPQ; | ||
140 | + tcg_out_insn_RXY(s, insn, datahi, h.base, h.index, h.disp); | ||
141 | + | ||
142 | + if (use_pair) { | ||
143 | + tgen_branch(s, S390_CC_ALWAYS, l2); | ||
144 | + tcg_out_label(s, l1); | ||
145 | + } | ||
146 | + } | ||
147 | + if (use_pair) { | ||
148 | + TCGReg d1, d2; | ||
149 | + | ||
150 | + if (need_bswap) { | ||
151 | + d1 = datalo, d2 = datahi; | ||
152 | + insn = is_ld ? RXY_LRVG : RXY_STRVG; | ||
153 | + } else { | ||
154 | + d1 = datahi, d2 = datalo; | ||
155 | + insn = is_ld ? RXY_LG : RXY_STG; | ||
156 | + } | ||
157 | + | ||
158 | + if (h.base == d1 || h.index == d1) { | ||
159 | + tcg_out_insn(s, RXY, LAY, TCG_TMP0, h.base, h.index, h.disp); | ||
160 | + h.base = TCG_TMP0; | ||
161 | + h.index = TCG_REG_NONE; | ||
162 | + h.disp = 0; | ||
163 | + } | ||
164 | + tcg_out_insn_RXY(s, insn, d1, h.base, h.index, h.disp); | ||
165 | + tcg_out_insn_RXY(s, insn, d2, h.base, h.index, h.disp + 8); | ||
166 | + } | ||
167 | + if (l2) { | ||
168 | + tcg_out_label(s, l2); | ||
169 | + } | ||
170 | + | ||
171 | + if (ldst) { | ||
172 | + ldst->type = TCG_TYPE_I128; | ||
173 | + ldst->datalo_reg = datalo; | ||
174 | + ldst->datahi_reg = datahi; | ||
175 | + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); | ||
176 | + } | ||
51 | +} | 177 | +} |
52 | + | 178 | + |
53 | static void do_nothing(CPUState *cpu, run_on_cpu_data unused) | 179 | static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) |
54 | { | 180 | { |
55 | } | 181 | /* Reuse the zeroing that exists for goto_ptr. */ |
56 | @@ -XXX,XX +XXX,XX @@ void qemu_timer_notify_cb(void *opaque, QEMUClockType type) | 182 | @@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, |
57 | static void kick_tcg_thread(void *opaque) | 183 | case INDEX_op_qemu_st_a64_i64: |
58 | { | 184 | tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I64); |
59 | timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick()); | 185 | break; |
60 | - qemu_cpu_kick_rr_cpu(); | 186 | + case INDEX_op_qemu_ld_a32_i128: |
61 | + qemu_cpu_kick_rr_next_cpu(); | 187 | + case INDEX_op_qemu_ld_a64_i128: |
62 | } | 188 | + tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true); |
63 | 189 | + break; | |
64 | static void start_tcg_kick_timer(void) | 190 | + case INDEX_op_qemu_st_a32_i128: |
65 | @@ -XXX,XX +XXX,XX @@ void qemu_cpu_kick(CPUState *cpu) | 191 | + case INDEX_op_qemu_st_a64_i128: |
66 | { | 192 | + tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false); |
67 | qemu_cond_broadcast(cpu->halt_cond); | 193 | + break; |
68 | if (tcg_enabled()) { | 194 | |
69 | - cpu_exit(cpu); | 195 | case INDEX_op_ld16s_i64: |
70 | - /* NOP unless doing single-thread RR */ | 196 | tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]); |
71 | - qemu_cpu_kick_rr_cpu(); | 197 | @@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) |
72 | + if (qemu_tcg_mttcg_enabled()) { | 198 | case INDEX_op_qemu_st_a32_i32: |
73 | + cpu_exit(cpu); | 199 | case INDEX_op_qemu_st_a64_i32: |
74 | + } else { | 200 | return C_O0_I2(r, r); |
75 | + qemu_cpu_kick_rr_cpus(); | 201 | + case INDEX_op_qemu_ld_a32_i128: |
76 | + } | 202 | + case INDEX_op_qemu_ld_a64_i128: |
77 | } else { | 203 | + return C_O2_I1(o, m, r); |
78 | if (hax_enabled()) { | 204 | + case INDEX_op_qemu_st_a32_i128: |
79 | /* | 205 | + case INDEX_op_qemu_st_a64_i128: |
206 | + return C_O0_I3(o, m, r); | ||
207 | |||
208 | case INDEX_op_deposit_i32: | ||
209 | case INDEX_op_deposit_i64: | ||
80 | -- | 210 | -- |
81 | 2.17.1 | 211 | 2.34.1 |
82 | |||
83 | diff view generated by jsdifflib |
1 | These new instructions are conditional on MSR.FP when TX=0 and | 1 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
---|---|---|---|
2 | MSR.VEC when TX=1. Since we only care about the Altivec registers, | ||
3 | and force TX=1, we can consider these to be Altivec instructions. | ||
4 | Since Altivec is true for any use of vector types, we only need | ||
5 | test have_isa_2_07. | ||
6 | |||
7 | This includes moves to and from the integer registers. | ||
8 | |||
9 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
10 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 2 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
11 | --- | 3 | --- |
12 | tcg/ppc/tcg-target.inc.c | 32 ++++++++++++++++++++++++++------ | 4 | .../generic/host/load-extract-al16-al8.h | 45 +++++++++++++++++++ |
13 | 1 file changed, 26 insertions(+), 6 deletions(-) | 5 | accel/tcg/ldst_atomicity.c.inc | 36 +-------------- |
6 | 2 files changed, 47 insertions(+), 34 deletions(-) | ||
7 | create mode 100644 host/include/generic/host/load-extract-al16-al8.h | ||
14 | 8 | ||
15 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 9 | diff --git a/host/include/generic/host/load-extract-al16-al8.h b/host/include/generic/host/load-extract-al16-al8.h |
10 | new file mode 100644 | ||
11 | index XXXXXXX..XXXXXXX | ||
12 | --- /dev/null | ||
13 | +++ b/host/include/generic/host/load-extract-al16-al8.h | ||
14 | @@ -XXX,XX +XXX,XX @@ | ||
15 | +/* | ||
16 | + * SPDX-License-Identifier: GPL-2.0-or-later | ||
17 | + * Atomic extract 64 from 128-bit, generic version. | ||
18 | + * | ||
19 | + * Copyright (C) 2023 Linaro, Ltd. | ||
20 | + */ | ||
21 | + | ||
22 | +#ifndef HOST_LOAD_EXTRACT_AL16_AL8_H | ||
23 | +#define HOST_LOAD_EXTRACT_AL16_AL8_H | ||
24 | + | ||
25 | +/** | ||
26 | + * load_atom_extract_al16_or_al8: | ||
27 | + * @pv: host address | ||
28 | + * @s: object size in bytes, @s <= 8. | ||
29 | + * | ||
30 | + * Load @s bytes from @pv, when pv % s != 0. If [p, p+s-1] does not | ||
31 | + * cross an 16-byte boundary then the access must be 16-byte atomic, | ||
32 | + * otherwise the access must be 8-byte atomic. | ||
33 | + */ | ||
34 | +static inline uint64_t ATTRIBUTE_ATOMIC128_OPT | ||
35 | +load_atom_extract_al16_or_al8(void *pv, int s) | ||
36 | +{ | ||
37 | + uintptr_t pi = (uintptr_t)pv; | ||
38 | + int o = pi & 7; | ||
39 | + int shr = (HOST_BIG_ENDIAN ? 16 - s - o : o) * 8; | ||
40 | + Int128 r; | ||
41 | + | ||
42 | + pv = (void *)(pi & ~7); | ||
43 | + if (pi & 8) { | ||
44 | + uint64_t *p8 = __builtin_assume_aligned(pv, 16, 8); | ||
45 | + uint64_t a = qatomic_read__nocheck(p8); | ||
46 | + uint64_t b = qatomic_read__nocheck(p8 + 1); | ||
47 | + | ||
48 | + if (HOST_BIG_ENDIAN) { | ||
49 | + r = int128_make128(b, a); | ||
50 | + } else { | ||
51 | + r = int128_make128(a, b); | ||
52 | + } | ||
53 | + } else { | ||
54 | + r = atomic16_read_ro(pv); | ||
55 | + } | ||
56 | + return int128_getlo(int128_urshift(r, shr)); | ||
57 | +} | ||
58 | + | ||
59 | +#endif /* HOST_LOAD_EXTRACT_AL16_AL8_H */ | ||
60 | diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc | ||
16 | index XXXXXXX..XXXXXXX 100644 | 61 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/tcg/ppc/tcg-target.inc.c | 62 | --- a/accel/tcg/ldst_atomicity.c.inc |
18 | +++ b/tcg/ppc/tcg-target.inc.c | 63 | +++ b/accel/tcg/ldst_atomicity.c.inc |
19 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | 64 | @@ -XXX,XX +XXX,XX @@ |
20 | #define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */ | 65 | * See the COPYING file in the top-level directory. |
21 | #define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */ | 66 | */ |
22 | 67 | ||
23 | +#define MFVSRD (XO31(51) | 1) /* v2.07, force sx=1 */ | 68 | +#include "host/load-extract-al16-al8.h" |
24 | +#define MFVSRWZ (XO31(115) | 1) /* v2.07, force sx=1 */ | ||
25 | +#define MTVSRD (XO31(179) | 1) /* v2.07, force tx=1 */ | ||
26 | +#define MTVSRWZ (XO31(243) | 1) /* v2.07, force tx=1 */ | ||
27 | + | 69 | + |
28 | #define RT(r) ((r)<<21) | 70 | #ifdef CONFIG_ATOMIC64 |
29 | #define RS(r) ((r)<<21) | 71 | # define HAVE_al8 true |
30 | #define RA(r) ((r)<<16) | 72 | #else |
31 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) | 73 | @@ -XXX,XX +XXX,XX @@ static uint64_t load_atom_extract_al16_or_exit(CPUArchState *env, uintptr_t ra, |
32 | tcg_debug_assert(TCG_TARGET_REG_BITS == 64); | 74 | return int128_getlo(r); |
33 | /* fallthru */ | 75 | } |
34 | case TCG_TYPE_I32: | 76 | |
35 | - if (ret < TCG_REG_V0 && arg < TCG_REG_V0) { | 77 | -/** |
36 | - tcg_out32(s, OR | SAB(arg, ret, arg)); | 78 | - * load_atom_extract_al16_or_al8: |
37 | - break; | 79 | - * @p: host address |
38 | - } else if (ret < TCG_REG_V0 || arg < TCG_REG_V0) { | 80 | - * @s: object size in bytes, @s <= 8. |
39 | - /* Altivec does not support vector/integer moves. */ | 81 | - * |
40 | - return false; | 82 | - * Load @s bytes from @p, when p % s != 0. If [p, p+s-1] does not |
41 | + if (ret < TCG_REG_V0) { | 83 | - * cross an 16-byte boundary then the access must be 16-byte atomic, |
42 | + if (arg < TCG_REG_V0) { | 84 | - * otherwise the access must be 8-byte atomic. |
43 | + tcg_out32(s, OR | SAB(arg, ret, arg)); | 85 | - */ |
44 | + break; | 86 | -static inline uint64_t ATTRIBUTE_ATOMIC128_OPT |
45 | + } else if (have_isa_2_07) { | 87 | -load_atom_extract_al16_or_al8(void *pv, int s) |
46 | + tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD) | 88 | -{ |
47 | + | VRT(arg) | RA(ret)); | 89 | - uintptr_t pi = (uintptr_t)pv; |
48 | + break; | 90 | - int o = pi & 7; |
49 | + } else { | 91 | - int shr = (HOST_BIG_ENDIAN ? 16 - s - o : o) * 8; |
50 | + /* Altivec does not support vector->integer moves. */ | 92 | - Int128 r; |
51 | + return false; | 93 | - |
52 | + } | 94 | - pv = (void *)(pi & ~7); |
53 | + } else if (arg < TCG_REG_V0) { | 95 | - if (pi & 8) { |
54 | + if (have_isa_2_07) { | 96 | - uint64_t *p8 = __builtin_assume_aligned(pv, 16, 8); |
55 | + tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD) | 97 | - uint64_t a = qatomic_read__nocheck(p8); |
56 | + | VRT(ret) | RA(arg)); | 98 | - uint64_t b = qatomic_read__nocheck(p8 + 1); |
57 | + break; | 99 | - |
58 | + } else { | 100 | - if (HOST_BIG_ENDIAN) { |
59 | + /* Altivec does not support integer->vector moves. */ | 101 | - r = int128_make128(b, a); |
60 | + return false; | 102 | - } else { |
61 | + } | 103 | - r = int128_make128(a, b); |
62 | } | 104 | - } |
63 | /* fallthru */ | 105 | - } else { |
64 | case TCG_TYPE_V64: | 106 | - r = atomic16_read_ro(pv); |
107 | - } | ||
108 | - return int128_getlo(int128_urshift(r, shr)); | ||
109 | -} | ||
110 | - | ||
111 | /** | ||
112 | * load_atom_4_by_2: | ||
113 | * @pv: host address | ||
65 | -- | 114 | -- |
66 | 2.17.1 | 115 | 2.34.1 |
67 | |||
68 | diff view generated by jsdifflib |
1 | Introduce all of the flags required to enable tcg backend vector support, | 1 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
---|---|---|---|
2 | and a runtime flag to indicate the host supports Altivec instructions. | 2 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
3 | --- | ||
4 | host/include/generic/host/store-insert-al16.h | 50 +++++++++++++++++++ | ||
5 | accel/tcg/ldst_atomicity.c.inc | 40 +-------------- | ||
6 | 2 files changed, 51 insertions(+), 39 deletions(-) | ||
7 | create mode 100644 host/include/generic/host/store-insert-al16.h | ||
3 | 8 | ||
4 | For now, do not actually set have_isa_altivec to true, because we have not | 9 | diff --git a/host/include/generic/host/store-insert-al16.h b/host/include/generic/host/store-insert-al16.h |
5 | yet added all of the code to actually generate all of the required insns. | ||
6 | However, we must define these flags in order to disable ifndefs that create | ||
7 | stub versions of the functions added here. | ||
8 | |||
9 | The change to tcg_out_movi works around a buglet in tcg.c wherein if we | ||
10 | do not define tcg_out_dupi_vec we get a declared but not defined Werror, | ||
11 | but if we only declare it we get a defined but not used Werror. We need | ||
12 | to this change to tcg_out_movi eventually anyway, so it's no biggie. | ||
13 | |||
14 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
15 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
16 | --- | ||
17 | tcg/ppc/tcg-target.h | 25 ++++++++++++++++ | ||
18 | tcg/ppc/tcg-target.opc.h | 5 ++++ | ||
19 | tcg/ppc/tcg-target.inc.c | 62 ++++++++++++++++++++++++++++++++++++++-- | ||
20 | 3 files changed, 89 insertions(+), 3 deletions(-) | ||
21 | create mode 100644 tcg/ppc/tcg-target.opc.h | ||
22 | |||
23 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | ||
24 | index XXXXXXX..XXXXXXX 100644 | ||
25 | --- a/tcg/ppc/tcg-target.h | ||
26 | +++ b/tcg/ppc/tcg-target.h | ||
27 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
28 | } TCGPowerISA; | ||
29 | |||
30 | extern TCGPowerISA have_isa; | ||
31 | +extern bool have_altivec; | ||
32 | |||
33 | #define have_isa_2_06 (have_isa >= tcg_isa_2_06) | ||
34 | #define have_isa_3_00 (have_isa >= tcg_isa_3_00) | ||
35 | @@ -XXX,XX +XXX,XX @@ extern TCGPowerISA have_isa; | ||
36 | #define TCG_TARGET_HAS_mulsh_i64 1 | ||
37 | #endif | ||
38 | |||
39 | +/* | ||
40 | + * While technically Altivec could support V64, it has no 64-bit store | ||
41 | + * instruction and substituting two 32-bit stores makes the generated | ||
42 | + * code quite large. | ||
43 | + */ | ||
44 | +#define TCG_TARGET_HAS_v64 0 | ||
45 | +#define TCG_TARGET_HAS_v128 have_altivec | ||
46 | +#define TCG_TARGET_HAS_v256 0 | ||
47 | + | ||
48 | +#define TCG_TARGET_HAS_andc_vec 0 | ||
49 | +#define TCG_TARGET_HAS_orc_vec 0 | ||
50 | +#define TCG_TARGET_HAS_not_vec 0 | ||
51 | +#define TCG_TARGET_HAS_neg_vec 0 | ||
52 | +#define TCG_TARGET_HAS_abs_vec 0 | ||
53 | +#define TCG_TARGET_HAS_shi_vec 0 | ||
54 | +#define TCG_TARGET_HAS_shs_vec 0 | ||
55 | +#define TCG_TARGET_HAS_shv_vec 0 | ||
56 | +#define TCG_TARGET_HAS_cmp_vec 0 | ||
57 | +#define TCG_TARGET_HAS_mul_vec 0 | ||
58 | +#define TCG_TARGET_HAS_sat_vec 0 | ||
59 | +#define TCG_TARGET_HAS_minmax_vec 0 | ||
60 | +#define TCG_TARGET_HAS_bitsel_vec 0 | ||
61 | +#define TCG_TARGET_HAS_cmpsel_vec 0 | ||
62 | + | ||
63 | void flush_icache_range(uintptr_t start, uintptr_t stop); | ||
64 | void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t); | ||
65 | |||
66 | diff --git a/tcg/ppc/tcg-target.opc.h b/tcg/ppc/tcg-target.opc.h | ||
67 | new file mode 100644 | 10 | new file mode 100644 |
68 | index XXXXXXX..XXXXXXX | 11 | index XXXXXXX..XXXXXXX |
69 | --- /dev/null | 12 | --- /dev/null |
70 | +++ b/tcg/ppc/tcg-target.opc.h | 13 | +++ b/host/include/generic/host/store-insert-al16.h |
71 | @@ -XXX,XX +XXX,XX @@ | 14 | @@ -XXX,XX +XXX,XX @@ |
72 | +/* | 15 | +/* |
73 | + * Target-specific opcodes for host vector expansion. These will be | 16 | + * SPDX-License-Identifier: GPL-2.0-or-later |
74 | + * emitted by tcg_expand_vec_op. For those familiar with GCC internals, | 17 | + * Atomic store insert into 128-bit, generic version. |
75 | + * consider these to be UNSPEC with names. | 18 | + * |
19 | + * Copyright (C) 2023 Linaro, Ltd. | ||
76 | + */ | 20 | + */ |
77 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 21 | + |
78 | index XXXXXXX..XXXXXXX 100644 | 22 | +#ifndef HOST_STORE_INSERT_AL16_H |
79 | --- a/tcg/ppc/tcg-target.inc.c | 23 | +#define HOST_STORE_INSERT_AL16_H |
80 | +++ b/tcg/ppc/tcg-target.inc.c | 24 | + |
81 | @@ -XXX,XX +XXX,XX @@ static tcg_insn_unit *tb_ret_addr; | 25 | +/** |
82 | 26 | + * store_atom_insert_al16: | |
83 | TCGPowerISA have_isa; | 27 | + * @p: host address |
84 | static bool have_isel; | 28 | + * @val: shifted value to store |
85 | +bool have_altivec; | 29 | + * @msk: mask for value to store |
86 | 30 | + * | |
87 | #ifndef CONFIG_SOFTMMU | 31 | + * Atomically store @val to @p masked by @msk. |
88 | #define TCG_GUEST_BASE_REG 30 | 32 | + */ |
89 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, | 33 | +static inline void ATTRIBUTE_ATOMIC128_OPT |
90 | } | 34 | +store_atom_insert_al16(Int128 *ps, Int128 val, Int128 msk) |
91 | } | 35 | +{ |
92 | 36 | +#if defined(CONFIG_ATOMIC128) | |
93 | -static inline void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, | 37 | + __uint128_t *pu; |
94 | - tcg_target_long arg) | 38 | + Int128Alias old, new; |
95 | +static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret, | 39 | + |
96 | + tcg_target_long val) | 40 | + /* With CONFIG_ATOMIC128, we can avoid the memory barriers. */ |
97 | { | 41 | + pu = __builtin_assume_aligned(ps, 16); |
98 | - tcg_out_movi_int(s, type, ret, arg, false); | 42 | + old.u = *pu; |
99 | + g_assert_not_reached(); | 43 | + msk = int128_not(msk); |
44 | + do { | ||
45 | + new.s = int128_and(old.s, msk); | ||
46 | + new.s = int128_or(new.s, val); | ||
47 | + } while (!__atomic_compare_exchange_n(pu, &old.u, new.u, true, | ||
48 | + __ATOMIC_RELAXED, __ATOMIC_RELAXED)); | ||
49 | +#else | ||
50 | + Int128 old, new, cmp; | ||
51 | + | ||
52 | + ps = __builtin_assume_aligned(ps, 16); | ||
53 | + old = *ps; | ||
54 | + msk = int128_not(msk); | ||
55 | + do { | ||
56 | + cmp = old; | ||
57 | + new = int128_and(old, msk); | ||
58 | + new = int128_or(new, val); | ||
59 | + old = atomic16_cmpxchg(ps, cmp, new); | ||
60 | + } while (int128_ne(cmp, old)); | ||
61 | +#endif | ||
100 | +} | 62 | +} |
101 | + | 63 | + |
102 | +static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, | 64 | +#endif /* HOST_STORE_INSERT_AL16_H */ |
103 | + tcg_target_long arg) | 65 | diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc |
104 | +{ | 66 | index XXXXXXX..XXXXXXX 100644 |
105 | + switch (type) { | 67 | --- a/accel/tcg/ldst_atomicity.c.inc |
106 | + case TCG_TYPE_I32: | 68 | +++ b/accel/tcg/ldst_atomicity.c.inc |
107 | + case TCG_TYPE_I64: | 69 | @@ -XXX,XX +XXX,XX @@ |
108 | + tcg_debug_assert(ret < TCG_REG_V0); | 70 | */ |
109 | + tcg_out_movi_int(s, type, ret, arg, false); | 71 | |
110 | + break; | 72 | #include "host/load-extract-al16-al8.h" |
111 | + | 73 | +#include "host/store-insert-al16.h" |
112 | + case TCG_TYPE_V64: | 74 | |
113 | + case TCG_TYPE_V128: | 75 | #ifdef CONFIG_ATOMIC64 |
114 | + tcg_debug_assert(ret >= TCG_REG_V0); | 76 | # define HAVE_al8 true |
115 | + tcg_out_dupi_vec(s, type, ret, arg); | 77 | @@ -XXX,XX +XXX,XX @@ static void store_atom_insert_al8(uint64_t *p, uint64_t val, uint64_t msk) |
116 | + break; | 78 | __ATOMIC_RELAXED, __ATOMIC_RELAXED)); |
117 | + | ||
118 | + default: | ||
119 | + g_assert_not_reached(); | ||
120 | + } | ||
121 | } | 79 | } |
122 | 80 | ||
123 | static bool mask_operand(uint32_t c, int *mb, int *me) | 81 | -/** |
124 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, | 82 | - * store_atom_insert_al16: |
125 | } | 83 | - * @p: host address |
126 | } | 84 | - * @val: shifted value to store |
127 | 85 | - * @msk: mask for value to store | |
128 | +int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | 86 | - * |
129 | +{ | 87 | - * Atomically store @val to @p masked by @msk. |
130 | + g_assert_not_reached(); | 88 | - */ |
131 | +} | 89 | -static void ATTRIBUTE_ATOMIC128_OPT |
132 | + | 90 | -store_atom_insert_al16(Int128 *ps, Int128Alias val, Int128Alias msk) |
133 | +static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, | 91 | -{ |
134 | + TCGReg dst, TCGReg src) | 92 | -#if defined(CONFIG_ATOMIC128) |
135 | +{ | 93 | - __uint128_t *pu, old, new; |
136 | + g_assert_not_reached(); | 94 | - |
137 | +} | 95 | - /* With CONFIG_ATOMIC128, we can avoid the memory barriers. */ |
138 | + | 96 | - pu = __builtin_assume_aligned(ps, 16); |
139 | +static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, | 97 | - old = *pu; |
140 | + TCGReg out, TCGReg base, intptr_t offset) | 98 | - do { |
141 | +{ | 99 | - new = (old & ~msk.u) | val.u; |
142 | + g_assert_not_reached(); | 100 | - } while (!__atomic_compare_exchange_n(pu, &old, new, true, |
143 | +} | 101 | - __ATOMIC_RELAXED, __ATOMIC_RELAXED)); |
144 | + | 102 | -#elif defined(CONFIG_CMPXCHG128) |
145 | +static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | 103 | - __uint128_t *pu, old, new; |
146 | + unsigned vecl, unsigned vece, | 104 | - |
147 | + const TCGArg *args, const int *const_args) | 105 | - /* |
148 | +{ | 106 | - * Without CONFIG_ATOMIC128, __atomic_compare_exchange_n will always |
149 | + g_assert_not_reached(); | 107 | - * defer to libatomic, so we must use __sync_*_compare_and_swap_16 |
150 | +} | 108 | - * and accept the sequential consistency that comes with it. |
151 | + | 109 | - */ |
152 | +void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, | 110 | - pu = __builtin_assume_aligned(ps, 16); |
153 | + TCGArg a0, ...) | 111 | - do { |
154 | +{ | 112 | - old = *pu; |
155 | + g_assert_not_reached(); | 113 | - new = (old & ~msk.u) | val.u; |
156 | +} | 114 | - } while (!__sync_bool_compare_and_swap_16(pu, old, new)); |
157 | + | 115 | -#else |
158 | static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | 116 | - qemu_build_not_reached(); |
159 | { | 117 | -#endif |
160 | static const TCGTargetOpDef r = { .args_ct_str = { "r" } }; | 118 | -} |
161 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | 119 | - |
162 | 120 | /** | |
163 | tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff; | 121 | * store_bytes_leN: |
164 | tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff; | 122 | * @pv: host address |
165 | + if (have_altivec) { | ||
166 | + tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull; | ||
167 | + tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull; | ||
168 | + } | ||
169 | |||
170 | tcg_target_call_clobber_regs = 0; | ||
171 | tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0); | ||
172 | -- | 123 | -- |
173 | 2.17.1 | 124 | 2.34.1 |
174 | |||
175 | diff view generated by jsdifflib |
1 | These new instructions are conditional only on MSR.VSX and | 1 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
---|---|---|---|
2 | are thus part of the VSX instruction set, and not Altivec. | ||
3 | This includes double-word loads and stores. | ||
4 | |||
5 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 2 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
7 | --- | 3 | --- |
8 | tcg/ppc/tcg-target.inc.c | 11 +++++++++++ | 4 | .../x86_64/host/load-extract-al16-al8.h | 50 +++++++++++++++++++ |
9 | 1 file changed, 11 insertions(+) | 5 | 1 file changed, 50 insertions(+) |
6 | create mode 100644 host/include/x86_64/host/load-extract-al16-al8.h | ||
10 | 7 | ||
11 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 8 | diff --git a/host/include/x86_64/host/load-extract-al16-al8.h b/host/include/x86_64/host/load-extract-al16-al8.h |
12 | index XXXXXXX..XXXXXXX 100644 | 9 | new file mode 100644 |
13 | --- a/tcg/ppc/tcg-target.inc.c | 10 | index XXXXXXX..XXXXXXX |
14 | +++ b/tcg/ppc/tcg-target.inc.c | 11 | --- /dev/null |
15 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | 12 | +++ b/host/include/x86_64/host/load-extract-al16-al8.h |
16 | #define LVEWX XO31(71) | 13 | @@ -XXX,XX +XXX,XX @@ |
17 | #define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */ | 14 | +/* |
18 | #define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */ | 15 | + * SPDX-License-Identifier: GPL-2.0-or-later |
19 | +#define LXSIWZX (XO31(12) | 1) /* v2.07, force tx=1 */ | 16 | + * Atomic extract 64 from 128-bit, x86_64 version. |
20 | 17 | + * | |
21 | #define STVX XO31(231) | 18 | + * Copyright (C) 2023 Linaro, Ltd. |
22 | #define STVEWX XO31(199) | 19 | + */ |
23 | #define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */ | 20 | + |
24 | +#define STXSIWX (XO31(140) | 1) /* v2.07, force sx=1 */ | 21 | +#ifndef X86_64_LOAD_EXTRACT_AL16_AL8_H |
25 | 22 | +#define X86_64_LOAD_EXTRACT_AL16_AL8_H | |
26 | #define VADDSBS VX4(768) | 23 | + |
27 | #define VADDUBS VX4(512) | 24 | +#ifdef CONFIG_INT128_TYPE |
28 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, | 25 | +#include "host/cpuinfo.h" |
29 | tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset); | 26 | + |
30 | break; | 27 | +/** |
31 | } | 28 | + * load_atom_extract_al16_or_al8: |
32 | + if (have_isa_2_07 && have_vsx) { | 29 | + * @pv: host address |
33 | + tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset); | 30 | + * @s: object size in bytes, @s <= 8. |
34 | + break; | 31 | + * |
35 | + } | 32 | + * Load @s bytes from @pv, when pv % s != 0. If [p, p+s-1] does not |
36 | tcg_debug_assert((offset & 3) == 0); | 33 | + * cross an 16-byte boundary then the access must be 16-byte atomic, |
37 | tcg_out_mem_long(s, 0, LVEWX, ret, base, offset); | 34 | + * otherwise the access must be 8-byte atomic. |
38 | shift = (offset - 4) & 0xc; | 35 | + */ |
39 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, | 36 | +static inline uint64_t ATTRIBUTE_ATOMIC128_OPT |
40 | tcg_out_mem_long(s, STW, STWX, arg, base, offset); | 37 | +load_atom_extract_al16_or_al8(void *pv, int s) |
41 | break; | 38 | +{ |
42 | } | 39 | + uintptr_t pi = (uintptr_t)pv; |
43 | + if (have_isa_2_07 && have_vsx) { | 40 | + __int128_t *ptr_align = (__int128_t *)(pi & ~7); |
44 | + tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset); | 41 | + int shr = (pi & 7) * 8; |
45 | + break; | 42 | + Int128Alias r; |
46 | + } | 43 | + |
47 | + assert((offset & 3) == 0); | 44 | + /* |
48 | tcg_debug_assert((offset & 3) == 0); | 45 | + * ptr_align % 16 is now only 0 or 8. |
49 | shift = (offset - 4) & 0xc; | 46 | + * If the host supports atomic loads with VMOVDQU, then always use that, |
50 | if (shift) { | 47 | + * making the branch highly predictable. Otherwise we must use VMOVDQA |
48 | + * when ptr_align % 16 == 0 for 16-byte atomicity. | ||
49 | + */ | ||
50 | + if ((cpuinfo & CPUINFO_ATOMIC_VMOVDQU) || (pi & 8)) { | ||
51 | + asm("vmovdqu %1, %0" : "=x" (r.i) : "m" (*ptr_align)); | ||
52 | + } else { | ||
53 | + asm("vmovdqa %1, %0" : "=x" (r.i) : "m" (*ptr_align)); | ||
54 | + } | ||
55 | + return int128_getlo(int128_urshift(r.s, shr)); | ||
56 | +} | ||
57 | +#else | ||
58 | +/* Fallback definition that must be optimized away, or error. */ | ||
59 | +uint64_t QEMU_ERROR("unsupported atomic") | ||
60 | + load_atom_extract_al16_or_al8(void *pv, int s); | ||
61 | +#endif | ||
62 | + | ||
63 | +#endif /* X86_64_LOAD_EXTRACT_AL16_AL8_H */ | ||
51 | -- | 64 | -- |
52 | 2.17.1 | 65 | 2.34.1 |
53 | |||
54 | diff view generated by jsdifflib |
1 | Now that we have implemented the required tcg operations, | 1 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
---|---|---|---|
2 | we can enable detection of host vector support. | ||
3 | |||
4 | Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> (PPC32) | ||
5 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 2 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
7 | --- | 3 | --- |
8 | tcg/ppc/tcg-target.inc.c | 4 ++++ | 4 | .../aarch64/host/load-extract-al16-al8.h | 40 +++++++++++++++++++ |
9 | 1 file changed, 4 insertions(+) | 5 | 1 file changed, 40 insertions(+) |
6 | create mode 100644 host/include/aarch64/host/load-extract-al16-al8.h | ||
10 | 7 | ||
11 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 8 | diff --git a/host/include/aarch64/host/load-extract-al16-al8.h b/host/include/aarch64/host/load-extract-al16-al8.h |
12 | index XXXXXXX..XXXXXXX 100644 | 9 | new file mode 100644 |
13 | --- a/tcg/ppc/tcg-target.inc.c | 10 | index XXXXXXX..XXXXXXX |
14 | +++ b/tcg/ppc/tcg-target.inc.c | 11 | --- /dev/null |
15 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | 12 | +++ b/host/include/aarch64/host/load-extract-al16-al8.h |
16 | have_isel = have_isa_2_06; | 13 | @@ -XXX,XX +XXX,XX @@ |
17 | #endif | 14 | +/* |
18 | 15 | + * SPDX-License-Identifier: GPL-2.0-or-later | |
19 | + if (hwcap & PPC_FEATURE_HAS_ALTIVEC) { | 16 | + * Atomic extract 64 from 128-bit, AArch64 version. |
20 | + have_altivec = true; | 17 | + * |
21 | + } | 18 | + * Copyright (C) 2023 Linaro, Ltd. |
19 | + */ | ||
22 | + | 20 | + |
23 | tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff; | 21 | +#ifndef AARCH64_LOAD_EXTRACT_AL16_AL8_H |
24 | tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff; | 22 | +#define AARCH64_LOAD_EXTRACT_AL16_AL8_H |
25 | if (have_altivec) { | 23 | + |
24 | +#include "host/cpuinfo.h" | ||
25 | +#include "tcg/debug-assert.h" | ||
26 | + | ||
27 | +/** | ||
28 | + * load_atom_extract_al16_or_al8: | ||
29 | + * @pv: host address | ||
30 | + * @s: object size in bytes, @s <= 8. | ||
31 | + * | ||
32 | + * Load @s bytes from @pv, when pv % s != 0. If [p, p+s-1] does not | ||
33 | + * cross an 16-byte boundary then the access must be 16-byte atomic, | ||
34 | + * otherwise the access must be 8-byte atomic. | ||
35 | + */ | ||
36 | +static inline uint64_t load_atom_extract_al16_or_al8(void *pv, int s) | ||
37 | +{ | ||
38 | + uintptr_t pi = (uintptr_t)pv; | ||
39 | + __int128_t *ptr_align = (__int128_t *)(pi & ~7); | ||
40 | + int shr = (pi & 7) * 8; | ||
41 | + uint64_t l, h; | ||
42 | + | ||
43 | + /* | ||
44 | + * With FEAT_LSE2, LDP is single-copy atomic if 16-byte aligned | ||
45 | + * and single-copy atomic on the parts if 8-byte aligned. | ||
46 | + * All we need do is align the pointer mod 8. | ||
47 | + */ | ||
48 | + tcg_debug_assert(HAVE_ATOMIC128_RO); | ||
49 | + asm("ldp %0, %1, %2" : "=r"(l), "=r"(h) : "m"(*ptr_align)); | ||
50 | + return (l >> shr) | (h << (-shr & 63)); | ||
51 | +} | ||
52 | + | ||
53 | +#endif /* AARCH64_LOAD_EXTRACT_AL16_AL8_H */ | ||
26 | -- | 54 | -- |
27 | 2.17.1 | 55 | 2.34.1 |
28 | |||
29 | diff view generated by jsdifflib |
1 | For Altivec, this is done via vector shift by vector, | 1 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
---|---|---|---|
2 | and loading the immediate into a register. | 2 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
3 | --- | ||
4 | host/include/aarch64/host/store-insert-al16.h | 47 +++++++++++++++++++ | ||
5 | 1 file changed, 47 insertions(+) | ||
6 | create mode 100644 host/include/aarch64/host/store-insert-al16.h | ||
3 | 7 | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 8 | diff --git a/host/include/aarch64/host/store-insert-al16.h b/host/include/aarch64/host/store-insert-al16.h |
5 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | 9 | new file mode 100644 |
6 | --- | 10 | index XXXXXXX..XXXXXXX |
7 | tcg/ppc/tcg-target.h | 2 +- | 11 | --- /dev/null |
8 | tcg/ppc/tcg-target.inc.c | 58 ++++++++++++++++++++++++++++++++++++++-- | 12 | +++ b/host/include/aarch64/host/store-insert-al16.h |
9 | 2 files changed, 57 insertions(+), 3 deletions(-) | 13 | @@ -XXX,XX +XXX,XX @@ |
10 | 14 | +/* | |
11 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | 15 | + * SPDX-License-Identifier: GPL-2.0-or-later |
12 | index XXXXXXX..XXXXXXX 100644 | 16 | + * Atomic store insert into 128-bit, AArch64 version. |
13 | --- a/tcg/ppc/tcg-target.h | 17 | + * |
14 | +++ b/tcg/ppc/tcg-target.h | 18 | + * Copyright (C) 2023 Linaro, Ltd. |
15 | @@ -XXX,XX +XXX,XX @@ extern bool have_altivec; | 19 | + */ |
16 | #define TCG_TARGET_HAS_abs_vec 0 | ||
17 | #define TCG_TARGET_HAS_shi_vec 0 | ||
18 | #define TCG_TARGET_HAS_shs_vec 0 | ||
19 | -#define TCG_TARGET_HAS_shv_vec 0 | ||
20 | +#define TCG_TARGET_HAS_shv_vec 1 | ||
21 | #define TCG_TARGET_HAS_cmp_vec 1 | ||
22 | #define TCG_TARGET_HAS_mul_vec 0 | ||
23 | #define TCG_TARGET_HAS_sat_vec 1 | ||
24 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
25 | index XXXXXXX..XXXXXXX 100644 | ||
26 | --- a/tcg/ppc/tcg-target.inc.c | ||
27 | +++ b/tcg/ppc/tcg-target.inc.c | ||
28 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
29 | #define VCMPGTUH VX4(582) | ||
30 | #define VCMPGTUW VX4(646) | ||
31 | |||
32 | +#define VSLB VX4(260) | ||
33 | +#define VSLH VX4(324) | ||
34 | +#define VSLW VX4(388) | ||
35 | +#define VSRB VX4(516) | ||
36 | +#define VSRH VX4(580) | ||
37 | +#define VSRW VX4(644) | ||
38 | +#define VSRAB VX4(772) | ||
39 | +#define VSRAH VX4(836) | ||
40 | +#define VSRAW VX4(900) | ||
41 | + | 20 | + |
42 | #define VAND VX4(1028) | 21 | +#ifndef AARCH64_STORE_INSERT_AL16_H |
43 | #define VANDC VX4(1092) | 22 | +#define AARCH64_STORE_INSERT_AL16_H |
44 | #define VNOR VX4(1284) | 23 | + |
45 | @@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | 24 | +/** |
46 | case INDEX_op_sssub_vec: | 25 | + * store_atom_insert_al16: |
47 | case INDEX_op_usadd_vec: | 26 | + * @p: host address |
48 | case INDEX_op_ussub_vec: | 27 | + * @val: shifted value to store |
49 | + case INDEX_op_shlv_vec: | 28 | + * @msk: mask for value to store |
50 | + case INDEX_op_shrv_vec: | 29 | + * |
51 | + case INDEX_op_sarv_vec: | 30 | + * Atomically store @val to @p masked by @msk. |
52 | return vece <= MO_32; | 31 | + */ |
53 | case INDEX_op_cmp_vec: | 32 | +static inline void ATTRIBUTE_ATOMIC128_OPT |
54 | + case INDEX_op_shli_vec: | 33 | +store_atom_insert_al16(Int128 *ps, Int128 val, Int128 msk) |
55 | + case INDEX_op_shri_vec: | ||
56 | + case INDEX_op_sari_vec: | ||
57 | return vece <= MO_32 ? -1 : 0; | ||
58 | default: | ||
59 | return 0; | ||
60 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
61 | umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 }, | ||
62 | smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 }, | ||
63 | umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 }, | ||
64 | - smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 }; | ||
65 | + smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 }, | ||
66 | + shlv_op[4] = { VSLB, VSLH, VSLW, 0 }, | ||
67 | + shrv_op[4] = { VSRB, VSRH, VSRW, 0 }, | ||
68 | + sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 }; | ||
69 | |||
70 | TCGType type = vecl + TCG_TYPE_V64; | ||
71 | TCGArg a0 = args[0], a1 = args[1], a2 = args[2]; | ||
72 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
73 | case INDEX_op_umax_vec: | ||
74 | insn = umax_op[vece]; | ||
75 | break; | ||
76 | + case INDEX_op_shlv_vec: | ||
77 | + insn = shlv_op[vece]; | ||
78 | + break; | ||
79 | + case INDEX_op_shrv_vec: | ||
80 | + insn = shrv_op[vece]; | ||
81 | + break; | ||
82 | + case INDEX_op_sarv_vec: | ||
83 | + insn = sarv_op[vece]; | ||
84 | + break; | ||
85 | case INDEX_op_and_vec: | ||
86 | insn = VAND; | ||
87 | break; | ||
88 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
89 | tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2)); | ||
90 | } | ||
91 | |||
92 | +static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0, | ||
93 | + TCGv_vec v1, TCGArg imm, TCGOpcode opci) | ||
94 | +{ | 34 | +{ |
95 | + TCGv_vec t1 = tcg_temp_new_vec(type); | 35 | + /* |
36 | + * GCC only implements __sync* primitives for int128 on aarch64. | ||
37 | + * We can do better without the barriers, and integrating the | ||
38 | + * arithmetic into the load-exclusive/store-conditional pair. | ||
39 | + */ | ||
40 | + uint64_t tl, th, vl, vh, ml, mh; | ||
41 | + uint32_t fail; | ||
96 | + | 42 | + |
97 | + /* Splat w/bytes for xxspltib. */ | 43 | + qemu_build_assert(!HOST_BIG_ENDIAN); |
98 | + tcg_gen_dupi_vec(MO_8, t1, imm & ((8 << vece) - 1)); | 44 | + vl = int128_getlo(val); |
99 | + vec_gen_3(opci, type, vece, tcgv_vec_arg(v0), | 45 | + vh = int128_gethi(val); |
100 | + tcgv_vec_arg(v1), tcgv_vec_arg(t1)); | 46 | + ml = int128_getlo(msk); |
101 | + tcg_temp_free_vec(t1); | 47 | + mh = int128_gethi(msk); |
48 | + | ||
49 | + asm("0: ldxp %[l], %[h], %[mem]\n\t" | ||
50 | + "bic %[l], %[l], %[ml]\n\t" | ||
51 | + "bic %[h], %[h], %[mh]\n\t" | ||
52 | + "orr %[l], %[l], %[vl]\n\t" | ||
53 | + "orr %[h], %[h], %[vh]\n\t" | ||
54 | + "stxp %w[f], %[l], %[h], %[mem]\n\t" | ||
55 | + "cbnz %w[f], 0b\n" | ||
56 | + : [mem] "+Q"(*ps), [f] "=&r"(fail), [l] "=&r"(tl), [h] "=&r"(th) | ||
57 | + : [vl] "r"(vl), [vh] "r"(vh), [ml] "r"(ml), [mh] "r"(mh)); | ||
102 | +} | 58 | +} |
103 | + | 59 | + |
104 | static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, | 60 | +#endif /* AARCH64_STORE_INSERT_AL16_H */ |
105 | TCGv_vec v1, TCGv_vec v2, TCGCond cond) | ||
106 | { | ||
107 | @@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, | ||
108 | { | ||
109 | va_list va; | ||
110 | TCGv_vec v0, v1, v2; | ||
111 | + TCGArg a2; | ||
112 | |||
113 | va_start(va, a0); | ||
114 | v0 = temp_tcgv_vec(arg_temp(a0)); | ||
115 | v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); | ||
116 | - v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); | ||
117 | + a2 = va_arg(va, TCGArg); | ||
118 | |||
119 | switch (opc) { | ||
120 | + case INDEX_op_shli_vec: | ||
121 | + expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec); | ||
122 | + break; | ||
123 | + case INDEX_op_shri_vec: | ||
124 | + expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec); | ||
125 | + break; | ||
126 | + case INDEX_op_sari_vec: | ||
127 | + expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec); | ||
128 | + break; | ||
129 | case INDEX_op_cmp_vec: | ||
130 | + v2 = temp_tcgv_vec(arg_temp(a2)); | ||
131 | expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg)); | ||
132 | break; | ||
133 | default: | ||
134 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
135 | case INDEX_op_smin_vec: | ||
136 | case INDEX_op_umax_vec: | ||
137 | case INDEX_op_umin_vec: | ||
138 | + case INDEX_op_shlv_vec: | ||
139 | + case INDEX_op_shrv_vec: | ||
140 | + case INDEX_op_sarv_vec: | ||
141 | return &v_v_v; | ||
142 | case INDEX_op_not_vec: | ||
143 | case INDEX_op_dup_vec: | ||
144 | -- | 61 | -- |
145 | 2.17.1 | 62 | 2.34.1 |
146 | |||
147 | diff view generated by jsdifflib |
1 | Altivec supports 32 128-bit vector registers, whose names are | 1 | The last use was removed by e77c89fb086a. |
---|---|---|---|
2 | by convention v0 through v31. | ||
3 | 2 | ||
3 | Fixes: e77c89fb086a ("cputlb: Remove static tlb sizing") | ||
4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
5 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
6 | --- | 6 | --- |
7 | tcg/ppc/tcg-target.h | 11 ++++- | 7 | tcg/aarch64/tcg-target.h | 1 - |
8 | tcg/ppc/tcg-target.inc.c | 88 +++++++++++++++++++++++++--------------- | 8 | tcg/arm/tcg-target.h | 1 - |
9 | 2 files changed, 65 insertions(+), 34 deletions(-) | 9 | tcg/i386/tcg-target.h | 1 - |
10 | tcg/mips/tcg-target.h | 1 - | ||
11 | tcg/ppc/tcg-target.h | 1 - | ||
12 | tcg/riscv/tcg-target.h | 1 - | ||
13 | tcg/s390x/tcg-target.h | 1 - | ||
14 | tcg/sparc64/tcg-target.h | 1 - | ||
15 | tcg/tci/tcg-target.h | 1 - | ||
16 | 9 files changed, 9 deletions(-) | ||
10 | 17 | ||
18 | diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/tcg/aarch64/tcg-target.h | ||
21 | +++ b/tcg/aarch64/tcg-target.h | ||
22 | @@ -XXX,XX +XXX,XX @@ | ||
23 | #include "host/cpuinfo.h" | ||
24 | |||
25 | #define TCG_TARGET_INSN_UNIT_SIZE 4 | ||
26 | -#define TCG_TARGET_TLB_DISPLACEMENT_BITS 24 | ||
27 | #define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) | ||
28 | |||
29 | typedef enum { | ||
30 | diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h | ||
31 | index XXXXXXX..XXXXXXX 100644 | ||
32 | --- a/tcg/arm/tcg-target.h | ||
33 | +++ b/tcg/arm/tcg-target.h | ||
34 | @@ -XXX,XX +XXX,XX @@ extern int arm_arch; | ||
35 | #define use_armv7_instructions (__ARM_ARCH >= 7 || arm_arch >= 7) | ||
36 | |||
37 | #define TCG_TARGET_INSN_UNIT_SIZE 4 | ||
38 | -#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16 | ||
39 | #define MAX_CODE_GEN_BUFFER_SIZE UINT32_MAX | ||
40 | |||
41 | typedef enum { | ||
42 | diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/tcg/i386/tcg-target.h | ||
45 | +++ b/tcg/i386/tcg-target.h | ||
46 | @@ -XXX,XX +XXX,XX @@ | ||
47 | #include "host/cpuinfo.h" | ||
48 | |||
49 | #define TCG_TARGET_INSN_UNIT_SIZE 1 | ||
50 | -#define TCG_TARGET_TLB_DISPLACEMENT_BITS 31 | ||
51 | |||
52 | #ifdef __x86_64__ | ||
53 | # define TCG_TARGET_REG_BITS 64 | ||
54 | diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h | ||
55 | index XXXXXXX..XXXXXXX 100644 | ||
56 | --- a/tcg/mips/tcg-target.h | ||
57 | +++ b/tcg/mips/tcg-target.h | ||
58 | @@ -XXX,XX +XXX,XX @@ | ||
59 | #endif | ||
60 | |||
61 | #define TCG_TARGET_INSN_UNIT_SIZE 4 | ||
62 | -#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16 | ||
63 | #define TCG_TARGET_NB_REGS 32 | ||
64 | |||
65 | #define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) | ||
11 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | 66 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h |
12 | index XXXXXXX..XXXXXXX 100644 | 67 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/tcg/ppc/tcg-target.h | 68 | --- a/tcg/ppc/tcg-target.h |
14 | +++ b/tcg/ppc/tcg-target.h | 69 | +++ b/tcg/ppc/tcg-target.h |
15 | @@ -XXX,XX +XXX,XX @@ | 70 | @@ -XXX,XX +XXX,XX @@ |
16 | # define TCG_TARGET_REG_BITS 32 | 71 | |
17 | #endif | 72 | #define TCG_TARGET_NB_REGS 64 |
18 | |||
19 | -#define TCG_TARGET_NB_REGS 32 | ||
20 | +#define TCG_TARGET_NB_REGS 64 | ||
21 | #define TCG_TARGET_INSN_UNIT_SIZE 4 | 73 | #define TCG_TARGET_INSN_UNIT_SIZE 4 |
22 | #define TCG_TARGET_TLB_DISPLACEMENT_BITS 16 | 74 | -#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16 |
23 | 75 | ||
24 | @@ -XXX,XX +XXX,XX @@ typedef enum { | 76 | typedef enum { |
25 | TCG_REG_R24, TCG_REG_R25, TCG_REG_R26, TCG_REG_R27, | 77 | TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3, |
26 | TCG_REG_R28, TCG_REG_R29, TCG_REG_R30, TCG_REG_R31, | 78 | diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h |
27 | |||
28 | + TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3, | ||
29 | + TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7, | ||
30 | + TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11, | ||
31 | + TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15, | ||
32 | + TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19, | ||
33 | + TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23, | ||
34 | + TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27, | ||
35 | + TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31, | ||
36 | + | ||
37 | TCG_REG_CALL_STACK = TCG_REG_R1, | ||
38 | TCG_AREG0 = TCG_REG_R27 | ||
39 | } TCGReg; | ||
40 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
41 | index XXXXXXX..XXXXXXX 100644 | 79 | index XXXXXXX..XXXXXXX 100644 |
42 | --- a/tcg/ppc/tcg-target.inc.c | 80 | --- a/tcg/riscv/tcg-target.h |
43 | +++ b/tcg/ppc/tcg-target.inc.c | 81 | +++ b/tcg/riscv/tcg-target.h |
44 | @@ -XXX,XX +XXX,XX @@ | 82 | @@ -XXX,XX +XXX,XX @@ |
45 | # define TCG_REG_TMP1 TCG_REG_R12 | 83 | #define TCG_TARGET_REG_BITS 64 |
46 | #endif | 84 | |
47 | 85 | #define TCG_TARGET_INSN_UNIT_SIZE 4 | |
48 | +#define TCG_VEC_TMP1 TCG_REG_V0 | 86 | -#define TCG_TARGET_TLB_DISPLACEMENT_BITS 20 |
49 | +#define TCG_VEC_TMP2 TCG_REG_V1 | 87 | #define TCG_TARGET_NB_REGS 32 |
50 | + | 88 | #define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) |
51 | #define TCG_REG_TB TCG_REG_R31 | 89 | |
52 | #define USE_REG_TB (TCG_TARGET_REG_BITS == 64) | 90 | diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h |
53 | 91 | index XXXXXXX..XXXXXXX 100644 | |
54 | @@ -XXX,XX +XXX,XX @@ bool have_isa_3_00; | 92 | --- a/tcg/s390x/tcg-target.h |
55 | #endif | 93 | +++ b/tcg/s390x/tcg-target.h |
56 | 94 | @@ -XXX,XX +XXX,XX @@ | |
57 | #ifdef CONFIG_DEBUG_TCG | 95 | #define S390_TCG_TARGET_H |
58 | -static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { | 96 | |
59 | - "r0", | 97 | #define TCG_TARGET_INSN_UNIT_SIZE 2 |
60 | - "r1", | 98 | -#define TCG_TARGET_TLB_DISPLACEMENT_BITS 19 |
61 | - "r2", | 99 | |
62 | - "r3", | 100 | /* We have a +- 4GB range on the branches; leave some slop. */ |
63 | - "r4", | 101 | #define MAX_CODE_GEN_BUFFER_SIZE (3 * GiB) |
64 | - "r5", | 102 | diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h |
65 | - "r6", | 103 | index XXXXXXX..XXXXXXX 100644 |
66 | - "r7", | 104 | --- a/tcg/sparc64/tcg-target.h |
67 | - "r8", | 105 | +++ b/tcg/sparc64/tcg-target.h |
68 | - "r9", | 106 | @@ -XXX,XX +XXX,XX @@ |
69 | - "r10", | 107 | #define SPARC_TCG_TARGET_H |
70 | - "r11", | 108 | |
71 | - "r12", | 109 | #define TCG_TARGET_INSN_UNIT_SIZE 4 |
72 | - "r13", | 110 | -#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32 |
73 | - "r14", | 111 | #define TCG_TARGET_NB_REGS 32 |
74 | - "r15", | 112 | #define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB) |
75 | - "r16", | 113 | |
76 | - "r17", | 114 | diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h |
77 | - "r18", | 115 | index XXXXXXX..XXXXXXX 100644 |
78 | - "r19", | 116 | --- a/tcg/tci/tcg-target.h |
79 | - "r20", | 117 | +++ b/tcg/tci/tcg-target.h |
80 | - "r21", | 118 | @@ -XXX,XX +XXX,XX @@ |
81 | - "r22", | 119 | |
82 | - "r23", | 120 | #define TCG_TARGET_INTERPRETER 1 |
83 | - "r24", | 121 | #define TCG_TARGET_INSN_UNIT_SIZE 4 |
84 | - "r25", | 122 | -#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32 |
85 | - "r26", | 123 | #define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) |
86 | - "r27", | 124 | |
87 | - "r28", | 125 | #if UINTPTR_MAX == UINT32_MAX |
88 | - "r29", | ||
89 | - "r30", | ||
90 | - "r31" | ||
91 | +static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = { | ||
92 | + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", | ||
93 | + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", | ||
94 | + "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", | ||
95 | + "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", | ||
96 | + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", | ||
97 | + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", | ||
98 | + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", | ||
99 | + "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", | ||
100 | }; | ||
101 | #endif | ||
102 | |||
103 | @@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = { | ||
104 | TCG_REG_R5, | ||
105 | TCG_REG_R4, | ||
106 | TCG_REG_R3, | ||
107 | + | ||
108 | + /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */ | ||
109 | + TCG_REG_V2, /* call clobbered, vectors */ | ||
110 | + TCG_REG_V3, | ||
111 | + TCG_REG_V4, | ||
112 | + TCG_REG_V5, | ||
113 | + TCG_REG_V6, | ||
114 | + TCG_REG_V7, | ||
115 | + TCG_REG_V8, | ||
116 | + TCG_REG_V9, | ||
117 | + TCG_REG_V10, | ||
118 | + TCG_REG_V11, | ||
119 | + TCG_REG_V12, | ||
120 | + TCG_REG_V13, | ||
121 | + TCG_REG_V14, | ||
122 | + TCG_REG_V15, | ||
123 | + TCG_REG_V16, | ||
124 | + TCG_REG_V17, | ||
125 | + TCG_REG_V18, | ||
126 | + TCG_REG_V19, | ||
127 | }; | ||
128 | |||
129 | static const int tcg_target_call_iarg_regs[] = { | ||
130 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | ||
131 | tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11); | ||
132 | tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12); | ||
133 | |||
134 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0); | ||
135 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1); | ||
136 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2); | ||
137 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3); | ||
138 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4); | ||
139 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5); | ||
140 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6); | ||
141 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7); | ||
142 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8); | ||
143 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9); | ||
144 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10); | ||
145 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11); | ||
146 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12); | ||
147 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13); | ||
148 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14); | ||
149 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15); | ||
150 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16); | ||
151 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17); | ||
152 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18); | ||
153 | + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19); | ||
154 | + | ||
155 | s->reserved_regs = 0; | ||
156 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */ | ||
157 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */ | ||
158 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | ||
159 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */ | ||
160 | #endif | ||
161 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */ | ||
162 | + tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1); | ||
163 | + tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2); | ||
164 | if (USE_REG_TB) { | ||
165 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB); /* tb->tc_ptr */ | ||
166 | } | ||
167 | -- | 126 | -- |
168 | 2.17.1 | 127 | 2.34.1 |
169 | 128 | ||
170 | 129 | diff view generated by jsdifflib |
1 | Add support for vector saturated add/subtract using Altivec | 1 | Invert the exit code, for use with the testsuite. |
---|---|---|---|
2 | instructions: | ||
3 | VADDSBS, VADDSHS, VADDSWS, VADDUBS, VADDUHS, VADDUWS, and | ||
4 | VSUBSBS, VSUBSHS, VSUBSWS, VSUBUBS, VSUBUHS, VSUBUWS. | ||
5 | 2 | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
7 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
8 | --- | 4 | --- |
9 | tcg/ppc/tcg-target.h | 2 +- | 5 | scripts/decodetree.py | 9 +++++++-- |
10 | tcg/ppc/tcg-target.inc.c | 36 ++++++++++++++++++++++++++++++++++++ | 6 | 1 file changed, 7 insertions(+), 2 deletions(-) |
11 | 2 files changed, 37 insertions(+), 1 deletion(-) | ||
12 | 7 | ||
13 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | 8 | diff --git a/scripts/decodetree.py b/scripts/decodetree.py |
14 | index XXXXXXX..XXXXXXX 100644 | 9 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/tcg/ppc/tcg-target.h | 10 | --- a/scripts/decodetree.py |
16 | +++ b/tcg/ppc/tcg-target.h | 11 | +++ b/scripts/decodetree.py |
17 | @@ -XXX,XX +XXX,XX @@ extern bool have_altivec; | 12 | @@ -XXX,XX +XXX,XX @@ |
18 | #define TCG_TARGET_HAS_shv_vec 0 | 13 | formats = {} |
19 | #define TCG_TARGET_HAS_cmp_vec 1 | 14 | allpatterns = [] |
20 | #define TCG_TARGET_HAS_mul_vec 0 | 15 | anyextern = False |
21 | -#define TCG_TARGET_HAS_sat_vec 0 | 16 | +testforerror = False |
22 | +#define TCG_TARGET_HAS_sat_vec 1 | 17 | |
23 | #define TCG_TARGET_HAS_minmax_vec 1 | 18 | translate_prefix = 'trans' |
24 | #define TCG_TARGET_HAS_bitsel_vec 0 | 19 | translate_scope = 'static ' |
25 | #define TCG_TARGET_HAS_cmpsel_vec 0 | 20 | @@ -XXX,XX +XXX,XX @@ def error_with_file(file, lineno, *args): |
26 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 21 | if output_file and output_fd: |
27 | index XXXXXXX..XXXXXXX 100644 | 22 | output_fd.close() |
28 | --- a/tcg/ppc/tcg-target.inc.c | 23 | os.remove(output_file) |
29 | +++ b/tcg/ppc/tcg-target.inc.c | 24 | - exit(1) |
30 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | 25 | + exit(0 if testforerror else 1) |
31 | #define STVX XO31(231) | 26 | # end error_with_file |
32 | #define STVEWX XO31(199) | 27 | |
33 | 28 | ||
34 | +#define VADDSBS VX4(768) | 29 | @@ -XXX,XX +XXX,XX @@ def main(): |
35 | +#define VADDUBS VX4(512) | 30 | global bitop_width |
36 | #define VADDUBM VX4(0) | 31 | global variablewidth |
37 | +#define VADDSHS VX4(832) | 32 | global anyextern |
38 | +#define VADDUHS VX4(576) | 33 | + global testforerror |
39 | #define VADDUHM VX4(64) | 34 | |
40 | +#define VADDSWS VX4(896) | 35 | decode_scope = 'static ' |
41 | +#define VADDUWS VX4(640) | 36 | |
42 | #define VADDUWM VX4(128) | 37 | long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=', |
43 | 38 | - 'static-decode=', 'varinsnwidth='] | |
44 | +#define VSUBSBS VX4(1792) | 39 | + 'static-decode=', 'varinsnwidth=', 'test-for-error'] |
45 | +#define VSUBUBS VX4(1536) | 40 | try: |
46 | #define VSUBUBM VX4(1024) | 41 | (opts, args) = getopt.gnu_getopt(sys.argv[1:], 'o:vw:', long_opts) |
47 | +#define VSUBSHS VX4(1856) | 42 | except getopt.GetoptError as err: |
48 | +#define VSUBUHS VX4(1600) | 43 | @@ -XXX,XX +XXX,XX @@ def main(): |
49 | #define VSUBUHM VX4(1088) | 44 | bitop_width = 64 |
50 | +#define VSUBSWS VX4(1920) | 45 | elif insnwidth != 32: |
51 | +#define VSUBUWS VX4(1664) | 46 | error(0, 'cannot handle insns of width', insnwidth) |
52 | #define VSUBUWM VX4(1152) | 47 | + elif o == '--test-for-error': |
53 | 48 | + testforerror = True | |
54 | #define VMAXSB VX4(258) | 49 | else: |
55 | @@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | 50 | assert False, 'unhandled option' |
56 | case INDEX_op_smin_vec: | 51 | |
57 | case INDEX_op_umax_vec: | 52 | @@ -XXX,XX +XXX,XX @@ def main(): |
58 | case INDEX_op_umin_vec: | 53 | |
59 | + case INDEX_op_ssadd_vec: | 54 | if output_file: |
60 | + case INDEX_op_sssub_vec: | 55 | output_fd.close() |
61 | + case INDEX_op_usadd_vec: | 56 | + exit(1 if testforerror else 0) |
62 | + case INDEX_op_ussub_vec: | 57 | # end main |
63 | return vece <= MO_32; | 58 | |
64 | case INDEX_op_cmp_vec: | 59 | |
65 | return vece <= MO_32 ? -1 : 0; | ||
66 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
67 | eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 }, | ||
68 | gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 }, | ||
69 | gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 }, | ||
70 | + ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 }, | ||
71 | + usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 }, | ||
72 | + sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 }, | ||
73 | + ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 }, | ||
74 | umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 }, | ||
75 | smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 }, | ||
76 | umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 }, | ||
77 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
78 | case INDEX_op_sub_vec: | ||
79 | insn = sub_op[vece]; | ||
80 | break; | ||
81 | + case INDEX_op_ssadd_vec: | ||
82 | + insn = ssadd_op[vece]; | ||
83 | + break; | ||
84 | + case INDEX_op_sssub_vec: | ||
85 | + insn = sssub_op[vece]; | ||
86 | + break; | ||
87 | + case INDEX_op_usadd_vec: | ||
88 | + insn = usadd_op[vece]; | ||
89 | + break; | ||
90 | + case INDEX_op_ussub_vec: | ||
91 | + insn = ussub_op[vece]; | ||
92 | + break; | ||
93 | case INDEX_op_smin_vec: | ||
94 | insn = smin_op[vece]; | ||
95 | break; | ||
96 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
97 | case INDEX_op_andc_vec: | ||
98 | case INDEX_op_orc_vec: | ||
99 | case INDEX_op_cmp_vec: | ||
100 | + case INDEX_op_ssadd_vec: | ||
101 | + case INDEX_op_sssub_vec: | ||
102 | + case INDEX_op_usadd_vec: | ||
103 | + case INDEX_op_ussub_vec: | ||
104 | case INDEX_op_smax_vec: | ||
105 | case INDEX_op_smin_vec: | ||
106 | case INDEX_op_umax_vec: | ||
107 | -- | 60 | -- |
108 | 2.17.1 | 61 | 2.34.1 |
109 | |||
110 | diff view generated by jsdifflib |
1 | Add support for vector add/subtract using Altivec instructions: | 1 | Two copy-paste errors walking the parse tree. |
---|---|---|---|
2 | VADDUBM, VADDUHM, VADDUWM, VSUBUBM, VSUBUHM, VSUBUWM. | ||
3 | 2 | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
5 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
6 | --- | 4 | --- |
7 | tcg/ppc/tcg-target.inc.c | 20 ++++++++++++++++++++ | 5 | scripts/decodetree.py | 4 ++-- |
8 | 1 file changed, 20 insertions(+) | 6 | 1 file changed, 2 insertions(+), 2 deletions(-) |
9 | 7 | ||
10 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 8 | diff --git a/scripts/decodetree.py b/scripts/decodetree.py |
11 | index XXXXXXX..XXXXXXX 100644 | 9 | index XXXXXXX..XXXXXXX 100644 |
12 | --- a/tcg/ppc/tcg-target.inc.c | 10 | --- a/scripts/decodetree.py |
13 | +++ b/tcg/ppc/tcg-target.inc.c | 11 | +++ b/scripts/decodetree.py |
14 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | 12 | @@ -XXX,XX +XXX,XX @@ def build_tree(self): |
15 | #define STVX XO31(231) | 13 | |
16 | #define STVEWX XO31(199) | 14 | def prop_format(self): |
17 | 15 | for p in self.pats: | |
18 | +#define VADDUBM VX4(0) | 16 | - p.build_tree() |
19 | +#define VADDUHM VX4(64) | 17 | + p.prop_format() |
20 | +#define VADDUWM VX4(128) | 18 | |
21 | + | 19 | def prop_width(self): |
22 | +#define VSUBUBM VX4(1024) | 20 | width = None |
23 | +#define VSUBUHM VX4(1088) | 21 | @@ -XXX,XX +XXX,XX @@ def __build_tree(pats, outerbits, outermask): |
24 | +#define VSUBUWM VX4(1152) | 22 | return t |
25 | + | 23 | |
26 | #define VMAXSB VX4(258) | 24 | def build_tree(self): |
27 | #define VMAXSH VX4(322) | 25 | - super().prop_format() |
28 | #define VMAXSW VX4(386) | 26 | + super().build_tree() |
29 | @@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | 27 | self.tree = self.__build_tree(self.pats, self.fixedbits, |
30 | case INDEX_op_andc_vec: | 28 | self.fixedmask) |
31 | case INDEX_op_not_vec: | 29 | |
32 | return 1; | ||
33 | + case INDEX_op_add_vec: | ||
34 | + case INDEX_op_sub_vec: | ||
35 | case INDEX_op_smax_vec: | ||
36 | case INDEX_op_smin_vec: | ||
37 | case INDEX_op_umax_vec: | ||
38 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
39 | const TCGArg *args, const int *const_args) | ||
40 | { | ||
41 | static const uint32_t | ||
42 | + add_op[4] = { VADDUBM, VADDUHM, VADDUWM, 0 }, | ||
43 | + sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, 0 }, | ||
44 | eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 }, | ||
45 | gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 }, | ||
46 | gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 }, | ||
47 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
48 | tcg_out_dupm_vec(s, type, vece, a0, a1, a2); | ||
49 | return; | ||
50 | |||
51 | + case INDEX_op_add_vec: | ||
52 | + insn = add_op[vece]; | ||
53 | + break; | ||
54 | + case INDEX_op_sub_vec: | ||
55 | + insn = sub_op[vece]; | ||
56 | + break; | ||
57 | case INDEX_op_smin_vec: | ||
58 | insn = smin_op[vece]; | ||
59 | break; | ||
60 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
61 | return (TCG_TARGET_REG_BITS == 64 ? &S_S | ||
62 | : TARGET_LONG_BITS == 32 ? &S_S_S : &S_S_S_S); | ||
63 | |||
64 | + case INDEX_op_add_vec: | ||
65 | + case INDEX_op_sub_vec: | ||
66 | case INDEX_op_and_vec: | ||
67 | case INDEX_op_or_vec: | ||
68 | case INDEX_op_xor_vec: | ||
69 | -- | 30 | -- |
70 | 2.17.1 | 31 | 2.34.1 |
71 | |||
72 | diff view generated by jsdifflib |
1 | Add support for vector maximum/minimum using Altivec instructions | 1 | Test err_pattern_group_empty.decode failed with exception: |
---|---|---|---|
2 | VMAXSB, VMAXSH, VMAXSW, VMAXUB, VMAXUH, VMAXUW, and | 2 | |
3 | VMINSB, VMINSH, VMINSW, VMINUB, VMINUH, VMINUW. | 3 | Traceback (most recent call last): |
4 | File "./scripts/decodetree.py", line 1424, in <module> main() | ||
5 | File "./scripts/decodetree.py", line 1342, in main toppat.build_tree() | ||
6 | File "./scripts/decodetree.py", line 627, in build_tree | ||
7 | self.tree = self.__build_tree(self.pats, self.fixedbits, | ||
8 | File "./scripts/decodetree.py", line 607, in __build_tree | ||
9 | fb = i.fixedbits & innermask | ||
10 | TypeError: unsupported operand type(s) for &: 'NoneType' and 'int' | ||
4 | 11 | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 12 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
6 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
7 | --- | 13 | --- |
8 | tcg/ppc/tcg-target.h | 2 +- | 14 | scripts/decodetree.py | 6 ++++++ |
9 | tcg/ppc/tcg-target.inc.c | 40 +++++++++++++++++++++++++++++++++++++++- | 15 | 1 file changed, 6 insertions(+) |
10 | 2 files changed, 40 insertions(+), 2 deletions(-) | ||
11 | 16 | ||
12 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | 17 | diff --git a/scripts/decodetree.py b/scripts/decodetree.py |
13 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/tcg/ppc/tcg-target.h | 19 | --- a/scripts/decodetree.py |
15 | +++ b/tcg/ppc/tcg-target.h | 20 | +++ b/scripts/decodetree.py |
16 | @@ -XXX,XX +XXX,XX @@ extern bool have_altivec; | 21 | @@ -XXX,XX +XXX,XX @@ def output_code(self, i, extracted, outerbits, outermask): |
17 | #define TCG_TARGET_HAS_cmp_vec 1 | 22 | output(ind, '}\n') |
18 | #define TCG_TARGET_HAS_mul_vec 0 | 23 | else: |
19 | #define TCG_TARGET_HAS_sat_vec 0 | 24 | p.output_code(i, extracted, p.fixedbits, p.fixedmask) |
20 | -#define TCG_TARGET_HAS_minmax_vec 0 | ||
21 | +#define TCG_TARGET_HAS_minmax_vec 1 | ||
22 | #define TCG_TARGET_HAS_bitsel_vec 0 | ||
23 | #define TCG_TARGET_HAS_cmpsel_vec 0 | ||
24 | |||
25 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | ||
26 | index XXXXXXX..XXXXXXX 100644 | ||
27 | --- a/tcg/ppc/tcg-target.inc.c | ||
28 | +++ b/tcg/ppc/tcg-target.inc.c | ||
29 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | ||
30 | #define STVX XO31(231) | ||
31 | #define STVEWX XO31(199) | ||
32 | |||
33 | +#define VMAXSB VX4(258) | ||
34 | +#define VMAXSH VX4(322) | ||
35 | +#define VMAXSW VX4(386) | ||
36 | +#define VMAXUB VX4(2) | ||
37 | +#define VMAXUH VX4(66) | ||
38 | +#define VMAXUW VX4(130) | ||
39 | +#define VMINSB VX4(770) | ||
40 | +#define VMINSH VX4(834) | ||
41 | +#define VMINSW VX4(898) | ||
42 | +#define VMINUB VX4(514) | ||
43 | +#define VMINUH VX4(578) | ||
44 | +#define VMINUW VX4(642) | ||
45 | + | 25 | + |
46 | #define VCMPEQUB VX4(6) | 26 | + def build_tree(self): |
47 | #define VCMPEQUH VX4(70) | 27 | + if not self.pats: |
48 | #define VCMPEQUW VX4(134) | 28 | + error_with_file(self.file, self.lineno, 'empty pattern group') |
49 | @@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) | 29 | + super().build_tree() |
50 | case INDEX_op_andc_vec: | 30 | + |
51 | case INDEX_op_not_vec: | 31 | #end IncMultiPattern |
52 | return 1; | 32 | |
53 | + case INDEX_op_smax_vec: | 33 | |
54 | + case INDEX_op_smin_vec: | ||
55 | + case INDEX_op_umax_vec: | ||
56 | + case INDEX_op_umin_vec: | ||
57 | + return vece <= MO_32; | ||
58 | case INDEX_op_cmp_vec: | ||
59 | return vece <= MO_32 ? -1 : 0; | ||
60 | default: | ||
61 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
62 | static const uint32_t | ||
63 | eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 }, | ||
64 | gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 }, | ||
65 | - gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 }; | ||
66 | + gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 }, | ||
67 | + umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 }, | ||
68 | + smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 }, | ||
69 | + umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 }, | ||
70 | + smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 }; | ||
71 | |||
72 | TCGType type = vecl + TCG_TYPE_V64; | ||
73 | TCGArg a0 = args[0], a1 = args[1], a2 = args[2]; | ||
74 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
75 | tcg_out_dupm_vec(s, type, vece, a0, a1, a2); | ||
76 | return; | ||
77 | |||
78 | + case INDEX_op_smin_vec: | ||
79 | + insn = smin_op[vece]; | ||
80 | + break; | ||
81 | + case INDEX_op_umin_vec: | ||
82 | + insn = umin_op[vece]; | ||
83 | + break; | ||
84 | + case INDEX_op_smax_vec: | ||
85 | + insn = smax_op[vece]; | ||
86 | + break; | ||
87 | + case INDEX_op_umax_vec: | ||
88 | + insn = umax_op[vece]; | ||
89 | + break; | ||
90 | case INDEX_op_and_vec: | ||
91 | insn = VAND; | ||
92 | break; | ||
93 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) | ||
94 | case INDEX_op_andc_vec: | ||
95 | case INDEX_op_orc_vec: | ||
96 | case INDEX_op_cmp_vec: | ||
97 | + case INDEX_op_smax_vec: | ||
98 | + case INDEX_op_smin_vec: | ||
99 | + case INDEX_op_umax_vec: | ||
100 | + case INDEX_op_umin_vec: | ||
101 | return &v_v_v; | ||
102 | case INDEX_op_not_vec: | ||
103 | case INDEX_op_dup_vec: | ||
104 | -- | 34 | -- |
105 | 2.17.1 | 35 | 2.34.1 |
106 | |||
107 | diff view generated by jsdifflib |
1 | Introduce macros VRT(), VRA(), VRB(), VRC() used for encoding | 1 | Nor report any PermissionError on remove. |
---|---|---|---|
2 | elements of Altivec instructions. | 2 | The primary purpose is testing with -o /dev/null. |
3 | 3 | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
5 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
6 | --- | 5 | --- |
7 | tcg/ppc/tcg-target.inc.c | 5 +++++ | 6 | scripts/decodetree.py | 7 ++++++- |
8 | 1 file changed, 5 insertions(+) | 7 | 1 file changed, 6 insertions(+), 1 deletion(-) |
9 | 8 | ||
10 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 9 | diff --git a/scripts/decodetree.py b/scripts/decodetree.py |
11 | index XXXXXXX..XXXXXXX 100644 | 10 | index XXXXXXX..XXXXXXX 100644 |
12 | --- a/tcg/ppc/tcg-target.inc.c | 11 | --- a/scripts/decodetree.py |
13 | +++ b/tcg/ppc/tcg-target.inc.c | 12 | +++ b/scripts/decodetree.py |
14 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | 13 | @@ -XXX,XX +XXX,XX @@ def error_with_file(file, lineno, *args): |
15 | #define MB64(b) ((b)<<5) | 14 | |
16 | #define FXM(b) (1 << (19 - (b))) | 15 | if output_file and output_fd: |
17 | 16 | output_fd.close() | |
18 | +#define VRT(r) (((r) & 31) << 21) | 17 | - os.remove(output_file) |
19 | +#define VRA(r) (((r) & 31) << 16) | 18 | + # Do not try to remove e.g. -o /dev/null |
20 | +#define VRB(r) (((r) & 31) << 11) | 19 | + if not output_file.startswith("/dev"): |
21 | +#define VRC(r) (((r) & 31) << 6) | 20 | + try: |
22 | + | 21 | + os.remove(output_file) |
23 | #define LK 1 | 22 | + except PermissionError: |
24 | 23 | + pass | |
25 | #define TAB(t, a, b) (RT(t) | RA(a) | RB(b)) | 24 | exit(0 if testforerror else 1) |
25 | # end error_with_file | ||
26 | |||
26 | -- | 27 | -- |
27 | 2.17.1 | 28 | 2.34.1 |
28 | |||
29 | diff view generated by jsdifflib |
1 | This is identical to have_isa_2_06, so replace it. | ||
---|---|---|---|
2 | |||
3 | Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 1 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
5 | --- | 2 | --- |
6 | tcg/ppc/tcg-target.inc.c | 5 ++--- | 3 | tests/decode/check.sh | 24 ---------------- |
7 | 1 file changed, 2 insertions(+), 3 deletions(-) | 4 | tests/decode/meson.build | 59 ++++++++++++++++++++++++++++++++++++++++ |
5 | tests/meson.build | 5 +--- | ||
6 | 3 files changed, 60 insertions(+), 28 deletions(-) | ||
7 | delete mode 100755 tests/decode/check.sh | ||
8 | create mode 100644 tests/decode/meson.build | ||
8 | 9 | ||
9 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 10 | diff --git a/tests/decode/check.sh b/tests/decode/check.sh |
11 | deleted file mode 100755 | ||
12 | index XXXXXXX..XXXXXXX | ||
13 | --- a/tests/decode/check.sh | ||
14 | +++ /dev/null | ||
15 | @@ -XXX,XX +XXX,XX @@ | ||
16 | -#!/bin/sh | ||
17 | -# This work is licensed under the terms of the GNU LGPL, version 2 or later. | ||
18 | -# See the COPYING.LIB file in the top-level directory. | ||
19 | - | ||
20 | -PYTHON=$1 | ||
21 | -DECODETREE=$2 | ||
22 | -E=0 | ||
23 | - | ||
24 | -# All of these tests should produce errors | ||
25 | -for i in err_*.decode; do | ||
26 | - if $PYTHON $DECODETREE $i > /dev/null 2> /dev/null; then | ||
27 | - # Pass, aka failed to fail. | ||
28 | - echo FAIL: $i 1>&2 | ||
29 | - E=1 | ||
30 | - fi | ||
31 | -done | ||
32 | - | ||
33 | -for i in succ_*.decode; do | ||
34 | - if ! $PYTHON $DECODETREE $i > /dev/null 2> /dev/null; then | ||
35 | - echo FAIL:$i 1>&2 | ||
36 | - fi | ||
37 | -done | ||
38 | - | ||
39 | -exit $E | ||
40 | diff --git a/tests/decode/meson.build b/tests/decode/meson.build | ||
41 | new file mode 100644 | ||
42 | index XXXXXXX..XXXXXXX | ||
43 | --- /dev/null | ||
44 | +++ b/tests/decode/meson.build | ||
45 | @@ -XXX,XX +XXX,XX @@ | ||
46 | +err_tests = [ | ||
47 | + 'err_argset1.decode', | ||
48 | + 'err_argset2.decode', | ||
49 | + 'err_field1.decode', | ||
50 | + 'err_field2.decode', | ||
51 | + 'err_field3.decode', | ||
52 | + 'err_field4.decode', | ||
53 | + 'err_field5.decode', | ||
54 | + 'err_field6.decode', | ||
55 | + 'err_init1.decode', | ||
56 | + 'err_init2.decode', | ||
57 | + 'err_init3.decode', | ||
58 | + 'err_init4.decode', | ||
59 | + 'err_overlap1.decode', | ||
60 | + 'err_overlap2.decode', | ||
61 | + 'err_overlap3.decode', | ||
62 | + 'err_overlap4.decode', | ||
63 | + 'err_overlap5.decode', | ||
64 | + 'err_overlap6.decode', | ||
65 | + 'err_overlap7.decode', | ||
66 | + 'err_overlap8.decode', | ||
67 | + 'err_overlap9.decode', | ||
68 | + 'err_pattern_group_empty.decode', | ||
69 | + 'err_pattern_group_ident1.decode', | ||
70 | + 'err_pattern_group_ident2.decode', | ||
71 | + 'err_pattern_group_nest1.decode', | ||
72 | + 'err_pattern_group_nest2.decode', | ||
73 | + 'err_pattern_group_nest3.decode', | ||
74 | + 'err_pattern_group_overlap1.decode', | ||
75 | + 'err_width1.decode', | ||
76 | + 'err_width2.decode', | ||
77 | + 'err_width3.decode', | ||
78 | + 'err_width4.decode', | ||
79 | +] | ||
80 | + | ||
81 | +succ_tests = [ | ||
82 | + 'succ_argset_type1.decode', | ||
83 | + 'succ_function.decode', | ||
84 | + 'succ_ident1.decode', | ||
85 | + 'succ_pattern_group_nest1.decode', | ||
86 | + 'succ_pattern_group_nest2.decode', | ||
87 | + 'succ_pattern_group_nest3.decode', | ||
88 | + 'succ_pattern_group_nest4.decode', | ||
89 | +] | ||
90 | + | ||
91 | +suite = 'decodetree' | ||
92 | +decodetree = find_program(meson.project_source_root() / 'scripts/decodetree.py') | ||
93 | + | ||
94 | +foreach t: err_tests | ||
95 | + test(fs.replace_suffix(t, ''), | ||
96 | + decodetree, args: ['-o', '/dev/null', '--test-for-error', files(t)], | ||
97 | + suite: suite) | ||
98 | +endforeach | ||
99 | + | ||
100 | +foreach t: succ_tests | ||
101 | + test(fs.replace_suffix(t, ''), | ||
102 | + decodetree, args: ['-o', '/dev/null', files(t)], | ||
103 | + suite: suite) | ||
104 | +endforeach | ||
105 | diff --git a/tests/meson.build b/tests/meson.build | ||
10 | index XXXXXXX..XXXXXXX 100644 | 106 | index XXXXXXX..XXXXXXX 100644 |
11 | --- a/tcg/ppc/tcg-target.inc.c | 107 | --- a/tests/meson.build |
12 | +++ b/tcg/ppc/tcg-target.inc.c | 108 | +++ b/tests/meson.build |
13 | @@ -XXX,XX +XXX,XX @@ static tcg_insn_unit *tb_ret_addr; | 109 | @@ -XXX,XX +XXX,XX @@ if have_tools and have_vhost_user and 'CONFIG_LINUX' in config_host |
14 | 110 | dependencies: [qemuutil, vhost_user]) | |
15 | TCGPowerISA have_isa; | 111 | endif |
16 | 112 | ||
17 | -#define HAVE_ISA_2_06 have_isa_2_06 | 113 | -test('decodetree', sh, |
18 | #define HAVE_ISEL have_isa_2_06 | 114 | - args: [ files('decode/check.sh'), config_host['PYTHON'], files('../scripts/decodetree.py') ], |
19 | 115 | - workdir: meson.current_source_dir() / 'decode', | |
20 | #ifndef CONFIG_SOFTMMU | 116 | - suite: 'decodetree') |
21 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) | 117 | +subdir('decode') |
22 | } | 118 | |
23 | } else { | 119 | if 'CONFIG_TCG' in config_all |
24 | uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)]; | 120 | subdir('fp') |
25 | - if (!HAVE_ISA_2_06 && insn == LDBRX) { | ||
26 | + if (!have_isa_2_06 && insn == LDBRX) { | ||
27 | tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4)); | ||
28 | tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo)); | ||
29 | tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0)); | ||
30 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) | ||
31 | } | ||
32 | } else { | ||
33 | uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)]; | ||
34 | - if (!HAVE_ISA_2_06 && insn == STDBRX) { | ||
35 | + if (!have_isa_2_06 && insn == STDBRX) { | ||
36 | tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo)); | ||
37 | tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, addrlo, 4)); | ||
38 | tcg_out_shri64(s, TCG_REG_R0, datalo, 32); | ||
39 | -- | 121 | -- |
40 | 2.17.1 | 122 | 2.34.1 |
41 | |||
42 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Peter Maydell <peter.maydell@linaro.org> | ||
1 | 2 | ||
3 | Document the named field syntax that we want to implement for the | ||
4 | decodetree script. This allows a field to be defined in terms of | ||
5 | some other field that the instruction pattern has already set, for | ||
6 | example: | ||
7 | |||
8 | %sz_imm 10:3 sz:3 !function=expand_sz_imm | ||
9 | |||
10 | to allow a function to be passed both an immediate field from the | ||
11 | instruction and also a sz value which might have been specified by | ||
12 | the instruction pattern directly (sz=1, etc) rather than being a | ||
13 | simple field within the instruction. | ||
14 | |||
15 | Note that the restriction on not having the format referring to the | ||
16 | pattern and the pattern referring to the format simultaneously is a | ||
17 | restriction of the decoder generator rather than inherently being a | ||
18 | silly thing to do. | ||
19 | |||
20 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
21 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
22 | Message-Id: <20230523120447.728365-3-peter.maydell@linaro.org> | ||
23 | --- | ||
24 | docs/devel/decodetree.rst | 33 ++++++++++++++++++++++++++++----- | ||
25 | 1 file changed, 28 insertions(+), 5 deletions(-) | ||
26 | |||
27 | diff --git a/docs/devel/decodetree.rst b/docs/devel/decodetree.rst | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/docs/devel/decodetree.rst | ||
30 | +++ b/docs/devel/decodetree.rst | ||
31 | @@ -XXX,XX +XXX,XX @@ Fields | ||
32 | |||
33 | Syntax:: | ||
34 | |||
35 | - field_def := '%' identifier ( unnamed_field )* ( !function=identifier )? | ||
36 | + field_def := '%' identifier ( field )* ( !function=identifier )? | ||
37 | + field := unnamed_field | named_field | ||
38 | unnamed_field := number ':' ( 's' ) number | ||
39 | + named_field := identifier ':' ( 's' ) number | ||
40 | |||
41 | For *unnamed_field*, the first number is the least-significant bit position | ||
42 | of the field and the second number is the length of the field. If the 's' is | ||
43 | -present, the field is considered signed. If multiple ``unnamed_fields`` are | ||
44 | -present, they are concatenated. In this way one can define disjoint fields. | ||
45 | +present, the field is considered signed. | ||
46 | + | ||
47 | +A *named_field* refers to some other field in the instruction pattern | ||
48 | +or format. Regardless of the length of the other field where it is | ||
49 | +defined, it will be inserted into this field with the specified | ||
50 | +signedness and bit width. | ||
51 | + | ||
52 | +Field definitions that involve loops (i.e. where a field is defined | ||
53 | +directly or indirectly in terms of itself) are errors. | ||
54 | + | ||
55 | +A format can include fields that refer to named fields that are | ||
56 | +defined in the instruction pattern(s) that use the format. | ||
57 | +Conversely, an instruction pattern can include fields that refer to | ||
58 | +named fields that are defined in the format it uses. However you | ||
59 | +cannot currently do both at once (i.e. pattern P uses format F; F has | ||
60 | +a field A that refers to a named field B that is defined in P, and P | ||
61 | +has a field C that refers to a named field D that is defined in F). | ||
62 | + | ||
63 | +If multiple ``fields`` are present, they are concatenated. | ||
64 | +In this way one can define disjoint fields. | ||
65 | |||
66 | If ``!function`` is specified, the concatenated result is passed through the | ||
67 | named function, taking and returning an integral value. | ||
68 | |||
69 | -One may use ``!function`` with zero ``unnamed_fields``. This case is called | ||
70 | +One may use ``!function`` with zero ``fields``. This case is called | ||
71 | a *parameter*, and the named function is only passed the ``DisasContext`` | ||
72 | and returns an integral value extracted from there. | ||
73 | |||
74 | -A field with no ``unnamed_fields`` and no ``!function`` is in error. | ||
75 | +A field with no ``fields`` and no ``!function`` is in error. | ||
76 | |||
77 | Field examples: | ||
78 | |||
79 | @@ -XXX,XX +XXX,XX @@ Field examples: | ||
80 | | %shimm8 5:s8 13:1 | expand_shimm8(sextract(i, 5, 8) << 1 | | | ||
81 | | !function=expand_shimm8 | extract(i, 13, 1)) | | ||
82 | +---------------------------+---------------------------------------------+ | ||
83 | +| %sz_imm 10:2 sz:3 | expand_sz_imm(extract(i, 10, 2) << 3 | | | ||
84 | +| !function=expand_sz_imm | extract(a->sz, 0, 3)) | | ||
85 | ++---------------------------+---------------------------------------------+ | ||
86 | |||
87 | Argument Sets | ||
88 | ============= | ||
89 | -- | ||
90 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Peter Maydell <peter.maydell@linaro.org> | ||
1 | 2 | ||
3 | To support referring to other named fields in field definitions, we | ||
4 | need to pass the str_extract() method a function which tells it how | ||
5 | to emit the code for a previously initialized named field. (In | ||
6 | Pattern::output_code() the other field will be "u.f_foo.field", and | ||
7 | in Format::output_extract() it is "a->field".) | ||
8 | |||
9 | Refactor the two callsites that currently do "output code to | ||
10 | initialize each field", and have them pass a lambda that defines how | ||
11 | to format the lvalue in each case. This is then used both in | ||
12 | emitting the LHS of the assignment and also passed down to | ||
13 | str_extract() as a new argument (unused at the moment, but will be | ||
14 | used in the following patch). | ||
15 | |||
16 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
17 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
18 | Message-Id: <20230523120447.728365-4-peter.maydell@linaro.org> | ||
19 | --- | ||
20 | scripts/decodetree.py | 26 +++++++++++++++----------- | ||
21 | 1 file changed, 15 insertions(+), 11 deletions(-) | ||
22 | |||
23 | diff --git a/scripts/decodetree.py b/scripts/decodetree.py | ||
24 | index XXXXXXX..XXXXXXX 100644 | ||
25 | --- a/scripts/decodetree.py | ||
26 | +++ b/scripts/decodetree.py | ||
27 | @@ -XXX,XX +XXX,XX @@ def __str__(self): | ||
28 | s = '' | ||
29 | return str(self.pos) + ':' + s + str(self.len) | ||
30 | |||
31 | - def str_extract(self): | ||
32 | + def str_extract(self, lvalue_formatter): | ||
33 | global bitop_width | ||
34 | s = 's' if self.sign else '' | ||
35 | return f'{s}extract{bitop_width}(insn, {self.pos}, {self.len})' | ||
36 | @@ -XXX,XX +XXX,XX @@ def __init__(self, subs, mask): | ||
37 | def __str__(self): | ||
38 | return str(self.subs) | ||
39 | |||
40 | - def str_extract(self): | ||
41 | + def str_extract(self, lvalue_formatter): | ||
42 | global bitop_width | ||
43 | ret = '0' | ||
44 | pos = 0 | ||
45 | for f in reversed(self.subs): | ||
46 | - ext = f.str_extract() | ||
47 | + ext = f.str_extract(lvalue_formatter) | ||
48 | if pos == 0: | ||
49 | ret = ext | ||
50 | else: | ||
51 | @@ -XXX,XX +XXX,XX @@ def __init__(self, value): | ||
52 | def __str__(self): | ||
53 | return str(self.value) | ||
54 | |||
55 | - def str_extract(self): | ||
56 | + def str_extract(self, lvalue_formatter): | ||
57 | return str(self.value) | ||
58 | |||
59 | def __cmp__(self, other): | ||
60 | @@ -XXX,XX +XXX,XX @@ def __init__(self, func, base): | ||
61 | def __str__(self): | ||
62 | return self.func + '(' + str(self.base) + ')' | ||
63 | |||
64 | - def str_extract(self): | ||
65 | - return self.func + '(ctx, ' + self.base.str_extract() + ')' | ||
66 | + def str_extract(self, lvalue_formatter): | ||
67 | + return (self.func + '(ctx, ' | ||
68 | + + self.base.str_extract(lvalue_formatter) + ')') | ||
69 | |||
70 | def __eq__(self, other): | ||
71 | return self.func == other.func and self.base == other.base | ||
72 | @@ -XXX,XX +XXX,XX @@ def __init__(self, func): | ||
73 | def __str__(self): | ||
74 | return self.func | ||
75 | |||
76 | - def str_extract(self): | ||
77 | + def str_extract(self, lvalue_formatter): | ||
78 | return self.func + '(ctx)' | ||
79 | |||
80 | def __eq__(self, other): | ||
81 | @@ -XXX,XX +XXX,XX @@ def __str__(self): | ||
82 | |||
83 | def str1(self, i): | ||
84 | return str_indent(i) + self.__str__() | ||
85 | + | ||
86 | + def output_fields(self, indent, lvalue_formatter): | ||
87 | + for n, f in self.fields.items(): | ||
88 | + output(indent, lvalue_formatter(n), ' = ', | ||
89 | + f.str_extract(lvalue_formatter), ';\n') | ||
90 | # end General | ||
91 | |||
92 | |||
93 | @@ -XXX,XX +XXX,XX @@ def extract_name(self): | ||
94 | def output_extract(self): | ||
95 | output('static void ', self.extract_name(), '(DisasContext *ctx, ', | ||
96 | self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n') | ||
97 | - for n, f in self.fields.items(): | ||
98 | - output(' a->', n, ' = ', f.str_extract(), ';\n') | ||
99 | + self.output_fields(str_indent(4), lambda n: 'a->' + n) | ||
100 | output('}\n\n') | ||
101 | # end Format | ||
102 | |||
103 | @@ -XXX,XX +XXX,XX @@ def output_code(self, i, extracted, outerbits, outermask): | ||
104 | if not extracted: | ||
105 | output(ind, self.base.extract_name(), | ||
106 | '(ctx, &u.f_', arg, ', insn);\n') | ||
107 | - for n, f in self.fields.items(): | ||
108 | - output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n') | ||
109 | + self.output_fields(ind, lambda n: 'u.f_' + arg + '.' + n) | ||
110 | output(ind, 'if (', translate_prefix, '_', self.name, | ||
111 | '(ctx, &u.f_', arg, ')) return true;\n') | ||
112 | |||
113 | -- | ||
114 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Peter Maydell <peter.maydell@linaro.org> | ||
1 | 2 | ||
3 | To support named fields, we will need to be able to do a topological | ||
4 | sort (so that we ensure that we output the assignment to field A | ||
5 | before the assignment to field B if field B refers to field A by | ||
6 | name). The good news is that there is a tsort in the python standard | ||
7 | library; the bad news is that it was only added in Python 3.9. | ||
8 | |||
9 | To bridge the gap between our current minimum supported Python | ||
10 | version and 3.9, provide a local implementation that has the | ||
11 | same API as the stdlib version for the parts we care about. | ||
12 | In future when QEMU's minimum Python version requirement reaches | ||
13 | 3.9 we can delete this code and replace it with an 'import' line. | ||
14 | |||
15 | The core of this implementation is based on | ||
16 | https://code.activestate.com/recipes/578272-topological-sort/ | ||
17 | which is MIT-licensed. | ||
18 | |||
19 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
20 | Acked-by: Richard Henderson <richard.henderson@linaro.org> | ||
21 | Message-Id: <20230523120447.728365-5-peter.maydell@linaro.org> | ||
22 | --- | ||
23 | scripts/decodetree.py | 74 +++++++++++++++++++++++++++++++++++++++++++ | ||
24 | 1 file changed, 74 insertions(+) | ||
25 | |||
26 | diff --git a/scripts/decodetree.py b/scripts/decodetree.py | ||
27 | index XXXXXXX..XXXXXXX 100644 | ||
28 | --- a/scripts/decodetree.py | ||
29 | +++ b/scripts/decodetree.py | ||
30 | @@ -XXX,XX +XXX,XX @@ | ||
31 | re_fmt_ident = '@[a-zA-Z0-9_]*' | ||
32 | re_pat_ident = '[a-zA-Z0-9_]*' | ||
33 | |||
34 | +# Local implementation of a topological sort. We use the same API that | ||
35 | +# the Python graphlib does, so that when QEMU moves forward to a | ||
36 | +# baseline of Python 3.9 or newer this code can all be dropped and | ||
37 | +# replaced with: | ||
38 | +# from graphlib import TopologicalSorter, CycleError | ||
39 | +# | ||
40 | +# https://docs.python.org/3.9/library/graphlib.html#graphlib.TopologicalSorter | ||
41 | +# | ||
42 | +# We only implement the parts of TopologicalSorter we care about: | ||
43 | +# ts = TopologicalSorter(graph=None) | ||
44 | +# create the sorter. graph is a dictionary whose keys are | ||
45 | +# nodes and whose values are lists of the predecessors of that node. | ||
46 | +# (That is, if graph contains "A" -> ["B", "C"] then we must output | ||
47 | +# B and C before A.) | ||
48 | +# ts.static_order() | ||
49 | +# returns a list of all the nodes in sorted order, or raises CycleError | ||
50 | +# CycleError | ||
51 | +# exception raised if there are cycles in the graph. The second | ||
52 | +# element in the args attribute is a list of nodes which form a | ||
53 | +# cycle; the first and last element are the same, eg [a, b, c, a] | ||
54 | +# (Our implementation doesn't give the order correctly.) | ||
55 | +# | ||
56 | +# For our purposes we can assume that the data set is always small | ||
57 | +# (typically 10 nodes or less, actual links in the graph very rare), | ||
58 | +# so we don't need to worry about efficiency of implementation. | ||
59 | +# | ||
60 | +# The core of this implementation is from | ||
61 | +# https://code.activestate.com/recipes/578272-topological-sort/ | ||
62 | +# (but updated to Python 3), and is under the MIT license. | ||
63 | + | ||
64 | +class CycleError(ValueError): | ||
65 | + """Subclass of ValueError raised if cycles exist in the graph""" | ||
66 | + pass | ||
67 | + | ||
68 | +class TopologicalSorter: | ||
69 | + """Topologically sort a graph""" | ||
70 | + def __init__(self, graph=None): | ||
71 | + self.graph = graph | ||
72 | + | ||
73 | + def static_order(self): | ||
74 | + # We do the sort right here, unlike the stdlib version | ||
75 | + from functools import reduce | ||
76 | + data = {} | ||
77 | + r = [] | ||
78 | + | ||
79 | + if not self.graph: | ||
80 | + return [] | ||
81 | + | ||
82 | + # This code wants the values in the dict to be specifically sets | ||
83 | + for k, v in self.graph.items(): | ||
84 | + data[k] = set(v) | ||
85 | + | ||
86 | + # Find all items that don't depend on anything. | ||
87 | + extra_items_in_deps = (reduce(set.union, data.values()) | ||
88 | + - set(data.keys())) | ||
89 | + # Add empty dependencies where needed | ||
90 | + data.update({item:{} for item in extra_items_in_deps}) | ||
91 | + while True: | ||
92 | + ordered = set(item for item, dep in data.items() if not dep) | ||
93 | + if not ordered: | ||
94 | + break | ||
95 | + r.extend(ordered) | ||
96 | + data = {item: (dep - ordered) | ||
97 | + for item, dep in data.items() | ||
98 | + if item not in ordered} | ||
99 | + if data: | ||
100 | + # This doesn't give as nice results as the stdlib, which | ||
101 | + # gives you the cycle by listing the nodes in order. Here | ||
102 | + # we only know the nodes in the cycle but not their order. | ||
103 | + raise CycleError(f'nodes are in a cycle', list(data.keys())) | ||
104 | + | ||
105 | + return r | ||
106 | +# end TopologicalSorter | ||
107 | + | ||
108 | def error_with_file(file, lineno, *args): | ||
109 | """Print an error message from file:line and args and exit.""" | ||
110 | global output_file | ||
111 | -- | ||
112 | 2.34.1 | diff view generated by jsdifflib |
1 | Introduce macro VX4() used for encoding Altivec instructions. | 1 | From: Peter Maydell <peter.maydell@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 3 | Implement support for named fields, i.e. where one field is defined |
4 | Signed-off-by: Aleksandar Markovic <amarkovic@wavecomp.com> | 4 | in terms of another, rather than directly in terms of bits extracted |
5 | from the instruction. | ||
6 | |||
7 | The new method referenced_fields() on all the Field classes returns a | ||
8 | list of fields that this field references. This just passes through, | ||
9 | except for the new NamedField class. | ||
10 | |||
11 | We can then use referenced_fields() to: | ||
12 | * construct a list of 'dangling references' for a format or | ||
13 | pattern, which is the fields that the format/pattern uses but | ||
14 | doesn't define itself | ||
15 | * do a topological sort, so that we output "field = value" | ||
16 | assignments in an order that means that we assign a field before | ||
17 | we reference it in a subsequent assignment | ||
18 | * check when we output the code for a pattern whether we need to | ||
19 | fill in the format fields before or after the pattern fields, and | ||
20 | do other error checking | ||
21 | |||
22 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
23 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
24 | Message-Id: <20230523120447.728365-6-peter.maydell@linaro.org> | ||
5 | --- | 25 | --- |
6 | tcg/ppc/tcg-target.inc.c | 1 + | 26 | scripts/decodetree.py | 145 ++++++++++++++++++++++++++++++++++++++++-- |
7 | 1 file changed, 1 insertion(+) | 27 | 1 file changed, 139 insertions(+), 6 deletions(-) |
8 | 28 | ||
9 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c | 29 | diff --git a/scripts/decodetree.py b/scripts/decodetree.py |
10 | index XXXXXXX..XXXXXXX 100644 | 30 | index XXXXXXX..XXXXXXX 100644 |
11 | --- a/tcg/ppc/tcg-target.inc.c | 31 | --- a/scripts/decodetree.py |
12 | +++ b/tcg/ppc/tcg-target.inc.c | 32 | +++ b/scripts/decodetree.py |
13 | @@ -XXX,XX +XXX,XX @@ static int tcg_target_const_match(tcg_target_long val, TCGType type, | 33 | @@ -XXX,XX +XXX,XX @@ def str_extract(self, lvalue_formatter): |
14 | #define XO31(opc) (OPCD(31)|((opc)<<1)) | 34 | s = 's' if self.sign else '' |
15 | #define XO58(opc) (OPCD(58)|(opc)) | 35 | return f'{s}extract{bitop_width}(insn, {self.pos}, {self.len})' |
16 | #define XO62(opc) (OPCD(62)|(opc)) | 36 | |
17 | +#define VX4(opc) (OPCD(4)|(opc)) | 37 | + def referenced_fields(self): |
18 | 38 | + return [] | |
19 | #define B OPCD( 18) | 39 | + |
20 | #define BC OPCD( 16) | 40 | def __eq__(self, other): |
41 | return self.sign == other.sign and self.mask == other.mask | ||
42 | |||
43 | @@ -XXX,XX +XXX,XX @@ def str_extract(self, lvalue_formatter): | ||
44 | pos += f.len | ||
45 | return ret | ||
46 | |||
47 | + def referenced_fields(self): | ||
48 | + l = [] | ||
49 | + for f in self.subs: | ||
50 | + l.extend(f.referenced_fields()) | ||
51 | + return l | ||
52 | + | ||
53 | def __ne__(self, other): | ||
54 | if len(self.subs) != len(other.subs): | ||
55 | return True | ||
56 | @@ -XXX,XX +XXX,XX @@ def __str__(self): | ||
57 | def str_extract(self, lvalue_formatter): | ||
58 | return str(self.value) | ||
59 | |||
60 | + def referenced_fields(self): | ||
61 | + return [] | ||
62 | + | ||
63 | def __cmp__(self, other): | ||
64 | return self.value - other.value | ||
65 | # end ConstField | ||
66 | @@ -XXX,XX +XXX,XX @@ def str_extract(self, lvalue_formatter): | ||
67 | return (self.func + '(ctx, ' | ||
68 | + self.base.str_extract(lvalue_formatter) + ')') | ||
69 | |||
70 | + def referenced_fields(self): | ||
71 | + return self.base.referenced_fields() | ||
72 | + | ||
73 | def __eq__(self, other): | ||
74 | return self.func == other.func and self.base == other.base | ||
75 | |||
76 | @@ -XXX,XX +XXX,XX @@ def __str__(self): | ||
77 | def str_extract(self, lvalue_formatter): | ||
78 | return self.func + '(ctx)' | ||
79 | |||
80 | + def referenced_fields(self): | ||
81 | + return [] | ||
82 | + | ||
83 | def __eq__(self, other): | ||
84 | return self.func == other.func | ||
85 | |||
86 | @@ -XXX,XX +XXX,XX @@ def __ne__(self, other): | ||
87 | return not self.__eq__(other) | ||
88 | # end ParameterField | ||
89 | |||
90 | +class NamedField: | ||
91 | + """Class representing a field already named in the pattern""" | ||
92 | + def __init__(self, name, sign, len): | ||
93 | + self.mask = 0 | ||
94 | + self.sign = sign | ||
95 | + self.len = len | ||
96 | + self.name = name | ||
97 | + | ||
98 | + def __str__(self): | ||
99 | + return self.name | ||
100 | + | ||
101 | + def str_extract(self, lvalue_formatter): | ||
102 | + global bitop_width | ||
103 | + s = 's' if self.sign else '' | ||
104 | + lvalue = lvalue_formatter(self.name) | ||
105 | + return f'{s}extract{bitop_width}({lvalue}, 0, {self.len})' | ||
106 | + | ||
107 | + def referenced_fields(self): | ||
108 | + return [self.name] | ||
109 | + | ||
110 | + def __eq__(self, other): | ||
111 | + return self.name == other.name | ||
112 | + | ||
113 | + def __ne__(self, other): | ||
114 | + return not self.__eq__(other) | ||
115 | +# end NamedField | ||
116 | |||
117 | class Arguments: | ||
118 | """Class representing the extracted fields of a format""" | ||
119 | @@ -XXX,XX +XXX,XX @@ def output_def(self): | ||
120 | output('} ', self.struct_name(), ';\n\n') | ||
121 | # end Arguments | ||
122 | |||
123 | - | ||
124 | class General: | ||
125 | """Common code between instruction formats and instruction patterns""" | ||
126 | def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w): | ||
127 | @@ -XXX,XX +XXX,XX @@ def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w): | ||
128 | self.fieldmask = fldm | ||
129 | self.fields = flds | ||
130 | self.width = w | ||
131 | + self.dangling = None | ||
132 | |||
133 | def __str__(self): | ||
134 | return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask) | ||
135 | @@ -XXX,XX +XXX,XX @@ def __str__(self): | ||
136 | def str1(self, i): | ||
137 | return str_indent(i) + self.__str__() | ||
138 | |||
139 | + def dangling_references(self): | ||
140 | + # Return a list of all named references which aren't satisfied | ||
141 | + # directly by this format/pattern. This will be either: | ||
142 | + # * a format referring to a field which is specified by the | ||
143 | + # pattern(s) using it | ||
144 | + # * a pattern referring to a field which is specified by the | ||
145 | + # format it uses | ||
146 | + # * a user error (referring to a field that doesn't exist at all) | ||
147 | + if self.dangling is None: | ||
148 | + # Compute this once and cache the answer | ||
149 | + dangling = [] | ||
150 | + for n, f in self.fields.items(): | ||
151 | + for r in f.referenced_fields(): | ||
152 | + if r not in self.fields: | ||
153 | + dangling.append(r) | ||
154 | + self.dangling = dangling | ||
155 | + return self.dangling | ||
156 | + | ||
157 | def output_fields(self, indent, lvalue_formatter): | ||
158 | + # We use a topological sort to ensure that any use of NamedField | ||
159 | + # comes after the initialization of the field it is referencing. | ||
160 | + graph = {} | ||
161 | for n, f in self.fields.items(): | ||
162 | - output(indent, lvalue_formatter(n), ' = ', | ||
163 | - f.str_extract(lvalue_formatter), ';\n') | ||
164 | + refs = f.referenced_fields() | ||
165 | + graph[n] = refs | ||
166 | + | ||
167 | + try: | ||
168 | + ts = TopologicalSorter(graph) | ||
169 | + for n in ts.static_order(): | ||
170 | + # We only want to emit assignments for the keys | ||
171 | + # in our fields list, not for anything that ends up | ||
172 | + # in the tsort graph only because it was referenced as | ||
173 | + # a NamedField. | ||
174 | + try: | ||
175 | + f = self.fields[n] | ||
176 | + output(indent, lvalue_formatter(n), ' = ', | ||
177 | + f.str_extract(lvalue_formatter), ';\n') | ||
178 | + except KeyError: | ||
179 | + pass | ||
180 | + except CycleError as e: | ||
181 | + # The second element of args is a list of nodes which form | ||
182 | + # a cycle (there might be others too, but only one is reported). | ||
183 | + # Pretty-print it to tell the user. | ||
184 | + cycle = ' => '.join(e.args[1]) | ||
185 | + error(self.lineno, 'field definitions form a cycle: ' + cycle) | ||
186 | # end General | ||
187 | |||
188 | |||
189 | @@ -XXX,XX +XXX,XX @@ def output_code(self, i, extracted, outerbits, outermask): | ||
190 | ind = str_indent(i) | ||
191 | arg = self.base.base.name | ||
192 | output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n') | ||
193 | + # We might have named references in the format that refer to fields | ||
194 | + # in the pattern, or named references in the pattern that refer | ||
195 | + # to fields in the format. This affects whether we extract the fields | ||
196 | + # for the format before or after the ones for the pattern. | ||
197 | + # For simplicity we don't allow cross references in both directions. | ||
198 | + # This is also where we catch the syntax error of referring to | ||
199 | + # a nonexistent field. | ||
200 | + fmt_refs = self.base.dangling_references() | ||
201 | + for r in fmt_refs: | ||
202 | + if r not in self.fields: | ||
203 | + error(self.lineno, f'format refers to undefined field {r}') | ||
204 | + pat_refs = self.dangling_references() | ||
205 | + for r in pat_refs: | ||
206 | + if r not in self.base.fields: | ||
207 | + error(self.lineno, f'pattern refers to undefined field {r}') | ||
208 | + if pat_refs and fmt_refs: | ||
209 | + error(self.lineno, ('pattern that uses fields defined in format ' | ||
210 | + 'cannot use format that uses fields defined ' | ||
211 | + 'in pattern')) | ||
212 | + if fmt_refs: | ||
213 | + # pattern fields first | ||
214 | + self.output_fields(ind, lambda n: 'u.f_' + arg + '.' + n) | ||
215 | + assert not extracted, "dangling fmt refs but it was already extracted" | ||
216 | if not extracted: | ||
217 | output(ind, self.base.extract_name(), | ||
218 | '(ctx, &u.f_', arg, ', insn);\n') | ||
219 | - self.output_fields(ind, lambda n: 'u.f_' + arg + '.' + n) | ||
220 | + if not fmt_refs: | ||
221 | + # pattern fields last | ||
222 | + self.output_fields(ind, lambda n: 'u.f_' + arg + '.' + n) | ||
223 | + | ||
224 | output(ind, 'if (', translate_prefix, '_', self.name, | ||
225 | '(ctx, &u.f_', arg, ')) return true;\n') | ||
226 | |||
227 | @@ -XXX,XX +XXX,XX @@ def output_code(self, i, extracted, outerbits, outermask): | ||
228 | ind = str_indent(i) | ||
229 | |||
230 | # If we identified all nodes below have the same format, | ||
231 | - # extract the fields now. | ||
232 | - if not extracted and self.base: | ||
233 | + # extract the fields now. But don't do it if the format relies | ||
234 | + # on named fields from the insn pattern, as those won't have | ||
235 | + # been initialised at this point. | ||
236 | + if not extracted and self.base and not self.base.dangling_references(): | ||
237 | output(ind, self.base.extract_name(), | ||
238 | '(ctx, &u.f_', self.base.base.name, ', insn);\n') | ||
239 | extracted = True | ||
240 | @@ -XXX,XX +XXX,XX @@ def parse_field(lineno, name, toks): | ||
241 | """Parse one instruction field from TOKS at LINENO""" | ||
242 | global fields | ||
243 | global insnwidth | ||
244 | + global re_C_ident | ||
245 | |||
246 | # A "simple" field will have only one entry; | ||
247 | # a "multifield" will have several. | ||
248 | @@ -XXX,XX +XXX,XX @@ def parse_field(lineno, name, toks): | ||
249 | func = func[1] | ||
250 | continue | ||
251 | |||
252 | + if re.fullmatch(re_C_ident + ':s[0-9]+', t): | ||
253 | + # Signed named field | ||
254 | + subtoks = t.split(':') | ||
255 | + n = subtoks[0] | ||
256 | + le = int(subtoks[1]) | ||
257 | + f = NamedField(n, True, le) | ||
258 | + subs.append(f) | ||
259 | + width += le | ||
260 | + continue | ||
261 | + if re.fullmatch(re_C_ident + ':[0-9]+', t): | ||
262 | + # Unsigned named field | ||
263 | + subtoks = t.split(':') | ||
264 | + n = subtoks[0] | ||
265 | + le = int(subtoks[1]) | ||
266 | + f = NamedField(n, False, le) | ||
267 | + subs.append(f) | ||
268 | + width += le | ||
269 | + continue | ||
270 | + | ||
271 | if re.fullmatch('[0-9]+:s[0-9]+', t): | ||
272 | # Signed field extract | ||
273 | subtoks = t.split(':s') | ||
21 | -- | 274 | -- |
22 | 2.17.1 | 275 | 2.34.1 |
23 | |||
24 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Peter Maydell <peter.maydell@linaro.org> | ||
1 | 2 | ||
3 | Add some tests for various cases of named-field use, both ones that | ||
4 | should work and ones that should be diagnosed as errors. | ||
5 | |||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-Id: <20230523120447.728365-7-peter.maydell@linaro.org> | ||
9 | --- | ||
10 | tests/decode/err_field10.decode | 7 +++++++ | ||
11 | tests/decode/err_field7.decode | 7 +++++++ | ||
12 | tests/decode/err_field8.decode | 8 ++++++++ | ||
13 | tests/decode/err_field9.decode | 14 ++++++++++++++ | ||
14 | tests/decode/succ_named_field.decode | 19 +++++++++++++++++++ | ||
15 | tests/decode/meson.build | 5 +++++ | ||
16 | 6 files changed, 60 insertions(+) | ||
17 | create mode 100644 tests/decode/err_field10.decode | ||
18 | create mode 100644 tests/decode/err_field7.decode | ||
19 | create mode 100644 tests/decode/err_field8.decode | ||
20 | create mode 100644 tests/decode/err_field9.decode | ||
21 | create mode 100644 tests/decode/succ_named_field.decode | ||
22 | |||
23 | diff --git a/tests/decode/err_field10.decode b/tests/decode/err_field10.decode | ||
24 | new file mode 100644 | ||
25 | index XXXXXXX..XXXXXXX | ||
26 | --- /dev/null | ||
27 | +++ b/tests/decode/err_field10.decode | ||
28 | @@ -XXX,XX +XXX,XX @@ | ||
29 | +# This work is licensed under the terms of the GNU LGPL, version 2 or later. | ||
30 | +# See the COPYING.LIB file in the top-level directory. | ||
31 | + | ||
32 | +# Diagnose formats which refer to undefined fields | ||
33 | +%field1 field2:3 | ||
34 | +@fmt ........ ........ ........ ........ %field1 | ||
35 | +insn 00000000 00000000 00000000 00000000 @fmt | ||
36 | diff --git a/tests/decode/err_field7.decode b/tests/decode/err_field7.decode | ||
37 | new file mode 100644 | ||
38 | index XXXXXXX..XXXXXXX | ||
39 | --- /dev/null | ||
40 | +++ b/tests/decode/err_field7.decode | ||
41 | @@ -XXX,XX +XXX,XX @@ | ||
42 | +# This work is licensed under the terms of the GNU LGPL, version 2 or later. | ||
43 | +# See the COPYING.LIB file in the top-level directory. | ||
44 | + | ||
45 | +# Diagnose fields whose definitions form a loop | ||
46 | +%field1 field2:3 | ||
47 | +%field2 field1:4 | ||
48 | +insn 00000000 00000000 00000000 00000000 %field1 %field2 | ||
49 | diff --git a/tests/decode/err_field8.decode b/tests/decode/err_field8.decode | ||
50 | new file mode 100644 | ||
51 | index XXXXXXX..XXXXXXX | ||
52 | --- /dev/null | ||
53 | +++ b/tests/decode/err_field8.decode | ||
54 | @@ -XXX,XX +XXX,XX @@ | ||
55 | +# This work is licensed under the terms of the GNU LGPL, version 2 or later. | ||
56 | +# See the COPYING.LIB file in the top-level directory. | ||
57 | + | ||
58 | +# Diagnose patterns which refer to undefined fields | ||
59 | +&f1 f1 a | ||
60 | +%field1 field2:3 | ||
61 | +@fmt ........ ........ ........ .... a:4 &f1 | ||
62 | +insn 00000000 00000000 00000000 0000 .... @fmt f1=%field1 | ||
63 | diff --git a/tests/decode/err_field9.decode b/tests/decode/err_field9.decode | ||
64 | new file mode 100644 | ||
65 | index XXXXXXX..XXXXXXX | ||
66 | --- /dev/null | ||
67 | +++ b/tests/decode/err_field9.decode | ||
68 | @@ -XXX,XX +XXX,XX @@ | ||
69 | +# This work is licensed under the terms of the GNU LGPL, version 2 or later. | ||
70 | +# See the COPYING.LIB file in the top-level directory. | ||
71 | + | ||
72 | +# Diagnose fields where the format refers to a field defined in the | ||
73 | +# pattern and the pattern refers to a field defined in the format. | ||
74 | +# This is theoretically not impossible to implement, but is not | ||
75 | +# supported by the script at this time. | ||
76 | +&abcd a b c d | ||
77 | +%refa a:3 | ||
78 | +%refc c:4 | ||
79 | +# Format defines 'c' and sets 'b' to an indirect ref to 'a' | ||
80 | +@fmt ........ ........ ........ c:8 &abcd b=%refa | ||
81 | +# Pattern defines 'a' and sets 'd' to an indirect ref to 'c' | ||
82 | +insn 00000000 00000000 00000000 ........ @fmt d=%refc a=6 | ||
83 | diff --git a/tests/decode/succ_named_field.decode b/tests/decode/succ_named_field.decode | ||
84 | new file mode 100644 | ||
85 | index XXXXXXX..XXXXXXX | ||
86 | --- /dev/null | ||
87 | +++ b/tests/decode/succ_named_field.decode | ||
88 | @@ -XXX,XX +XXX,XX @@ | ||
89 | +# This work is licensed under the terms of the GNU LGPL, version 2 or later. | ||
90 | +# See the COPYING.LIB file in the top-level directory. | ||
91 | + | ||
92 | +# field using a named_field | ||
93 | +%imm_sz 8:8 sz:3 | ||
94 | +insn 00000000 00000000 ........ 00000000 imm_sz=%imm_sz sz=1 | ||
95 | + | ||
96 | +# Ditto, via a format. Here a field in the format | ||
97 | +# references a named field defined in the insn pattern: | ||
98 | +&imm_a imm alpha | ||
99 | +%foo 0:16 alpha:4 | ||
100 | +@foo 00000001 ........ ........ ........ &imm_a imm=%foo | ||
101 | +i1 ........ 00000000 ........ ........ @foo alpha=1 | ||
102 | +i2 ........ 00000001 ........ ........ @foo alpha=2 | ||
103 | + | ||
104 | +# Here the named field is defined in the format and referenced | ||
105 | +# from the insn pattern: | ||
106 | +@bar 00000010 ........ ........ ........ &imm_a alpha=4 | ||
107 | +i3 ........ 00000000 ........ ........ @bar imm=%foo | ||
108 | diff --git a/tests/decode/meson.build b/tests/decode/meson.build | ||
109 | index XXXXXXX..XXXXXXX 100644 | ||
110 | --- a/tests/decode/meson.build | ||
111 | +++ b/tests/decode/meson.build | ||
112 | @@ -XXX,XX +XXX,XX @@ err_tests = [ | ||
113 | 'err_field4.decode', | ||
114 | 'err_field5.decode', | ||
115 | 'err_field6.decode', | ||
116 | + 'err_field7.decode', | ||
117 | + 'err_field8.decode', | ||
118 | + 'err_field9.decode', | ||
119 | + 'err_field10.decode', | ||
120 | 'err_init1.decode', | ||
121 | 'err_init2.decode', | ||
122 | 'err_init3.decode', | ||
123 | @@ -XXX,XX +XXX,XX @@ succ_tests = [ | ||
124 | 'succ_argset_type1.decode', | ||
125 | 'succ_function.decode', | ||
126 | 'succ_ident1.decode', | ||
127 | + 'succ_named_field.decode', | ||
128 | 'succ_pattern_group_nest1.decode', | ||
129 | 'succ_pattern_group_nest2.decode', | ||
130 | 'succ_pattern_group_nest3.decode', | ||
131 | -- | ||
132 | 2.34.1 | diff view generated by jsdifflib |