1 | The following changes since commit 75d30fde55485b965a1168a21d016dd07b50ed32: | 1 | The following changes since commit 7fe6cb68117ac856e03c93d18aca09de015392b0: |
---|---|---|---|
2 | 2 | ||
3 | Merge tag 'block-pull-request' of https://gitlab.com/stefanha/qemu into staging (2022-10-30 15:07:25 -0400) | 3 | Merge tag 'pull-target-arm-20230530-1' of https://git.linaro.org/people/pmaydell/qemu-arm into staging (2023-05-30 08:02:05 -0700) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20221031 | 7 | https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230530 |
8 | 8 | ||
9 | for you to fetch changes up to cb375590983fc3d23600d02ba05a05d34fe44150: | 9 | for you to fetch changes up to 276d77de503e8f5f5cbd3f7d94302ca12d1d982e: |
10 | 10 | ||
11 | target/i386: Expand eflags updates inline (2022-10-31 11:39:10 +1100) | 11 | tests/decode: Add tests for various named-field cases (2023-05-30 10:55:39 -0700) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Remove sparc32plus support from tcg/sparc. | 14 | Improvements to 128-bit atomics: |
15 | target/i386: Use cpu_unwind_state_data for tpr access. | 15 | - Separate __int128_t type and arithmetic detection |
16 | target/i386: Expand eflags updates inline | 16 | - Support 128-bit load/store in backend for i386, aarch64, ppc64, s390x |
17 | - Accelerate atomics via host/include/ | ||
18 | Decodetree: | ||
19 | - Add named field syntax | ||
20 | - Move tests to meson | ||
17 | 21 | ||
18 | ---------------------------------------------------------------- | 22 | ---------------------------------------------------------------- |
19 | Icenowy Zheng (1): | 23 | Peter Maydell (5): |
20 | tcg/tci: fix logic error when registering helpers via FFI | 24 | docs: Document decodetree named field syntax |
25 | scripts/decodetree: Pass lvalue-formatter function to str_extract() | ||
26 | scripts/decodetree: Implement a topological sort | ||
27 | scripts/decodetree: Implement named field support | ||
28 | tests/decode: Add tests for various named-field cases | ||
21 | 29 | ||
22 | Richard Henderson (10): | 30 | Richard Henderson (22): |
23 | tcg/sparc: Remove support for sparc32plus | 31 | tcg: Fix register move type in tcg_out_ld_helper_ret |
24 | tcg/sparc64: Rename from tcg/sparc | 32 | accel/tcg: Fix check for page writeability in load_atomic16_or_exit |
25 | tcg/sparc64: Remove sparc32plus constraints | 33 | meson: Split test for __int128_t type from __int128_t arithmetic |
26 | accel/tcg: Introduce cpu_unwind_state_data | 34 | qemu/atomic128: Add x86_64 atomic128-ldst.h |
27 | target/i386: Use cpu_unwind_state_data for tpr access | 35 | tcg/i386: Support 128-bit load/store |
28 | target/openrisc: Always exit after mtspr npc | 36 | tcg/aarch64: Rename temporaries |
29 | target/openrisc: Use cpu_unwind_state_data for mfspr | 37 | tcg/aarch64: Reserve TCG_REG_TMP1, TCG_REG_TMP2 |
30 | accel/tcg: Remove will_exit argument from cpu_restore_state | 38 | tcg/aarch64: Simplify constraints on qemu_ld/st |
31 | accel/tcg: Remove reset_icount argument from cpu_restore_state_from_tb | 39 | tcg/aarch64: Support 128-bit load/store |
32 | target/i386: Expand eflags updates inline | 40 | tcg/ppc: Support 128-bit load/store |
41 | tcg/s390x: Support 128-bit load/store | ||
42 | accel/tcg: Extract load_atom_extract_al16_or_al8 to host header | ||
43 | accel/tcg: Extract store_atom_insert_al16 to host header | ||
44 | accel/tcg: Add x86_64 load_atom_extract_al16_or_al8 | ||
45 | accel/tcg: Add aarch64 lse2 load_atom_extract_al16_or_al8 | ||
46 | accel/tcg: Add aarch64 store_atom_insert_al16 | ||
47 | tcg: Remove TCG_TARGET_TLB_DISPLACEMENT_BITS | ||
48 | decodetree: Add --test-for-error | ||
49 | decodetree: Fix recursion in prop_format and build_tree | ||
50 | decodetree: Diagnose empty pattern group | ||
51 | decodetree: Do not remove output_file from /dev | ||
52 | tests/decode: Convert tests to meson | ||
33 | 53 | ||
34 | meson.build | 4 +- | 54 | docs/devel/decodetree.rst | 33 ++- |
35 | accel/tcg/internal.h | 4 +- | 55 | meson.build | 15 +- |
36 | include/exec/exec-all.h | 24 ++- | 56 | host/include/aarch64/host/load-extract-al16-al8.h | 40 ++++ |
37 | target/i386/helper.h | 5 - | 57 | host/include/aarch64/host/store-insert-al16.h | 47 ++++ |
38 | tcg/{sparc => sparc64}/tcg-target-con-set.h | 16 +- | 58 | host/include/generic/host/load-extract-al16-al8.h | 45 ++++ |
39 | tcg/{sparc => sparc64}/tcg-target-con-str.h | 3 - | 59 | host/include/generic/host/store-insert-al16.h | 50 ++++ |
40 | tcg/{sparc => sparc64}/tcg-target.h | 11 -- | 60 | host/include/x86_64/host/atomic128-ldst.h | 68 ++++++ |
41 | accel/tcg/cpu-exec-common.c | 2 +- | 61 | host/include/x86_64/host/load-extract-al16-al8.h | 50 ++++ |
42 | accel/tcg/tb-maint.c | 4 +- | 62 | include/qemu/int128.h | 4 +- |
43 | accel/tcg/translate-all.c | 91 +++++---- | 63 | tcg/aarch64/tcg-target-con-set.h | 4 +- |
44 | target/alpha/helper.c | 2 +- | 64 | tcg/aarch64/tcg-target-con-str.h | 1 - |
45 | target/alpha/mem_helper.c | 2 +- | 65 | tcg/aarch64/tcg-target.h | 12 +- |
46 | target/arm/op_helper.c | 2 +- | 66 | tcg/arm/tcg-target.h | 1 - |
47 | target/arm/tlb_helper.c | 8 +- | 67 | tcg/i386/tcg-target.h | 5 +- |
48 | target/cris/helper.c | 2 +- | 68 | tcg/mips/tcg-target.h | 1 - |
49 | target/i386/helper.c | 21 ++- | 69 | tcg/ppc/tcg-target-con-set.h | 2 + |
50 | target/i386/tcg/cc_helper.c | 41 ----- | 70 | tcg/ppc/tcg-target-con-str.h | 1 + |
51 | target/i386/tcg/sysemu/svm_helper.c | 2 +- | 71 | tcg/ppc/tcg-target.h | 4 +- |
52 | target/i386/tcg/translate.c | 30 ++- | 72 | tcg/riscv/tcg-target.h | 1 - |
53 | target/m68k/op_helper.c | 4 +- | 73 | tcg/s390x/tcg-target-con-set.h | 2 + |
54 | target/microblaze/helper.c | 2 +- | 74 | tcg/s390x/tcg-target.h | 3 +- |
55 | target/nios2/op_helper.c | 2 +- | 75 | tcg/sparc64/tcg-target.h | 1 - |
56 | target/openrisc/sys_helper.c | 17 +- | 76 | tcg/tci/tcg-target.h | 1 - |
57 | target/ppc/excp_helper.c | 2 +- | 77 | tests/decode/err_field10.decode | 7 + |
58 | target/s390x/tcg/excp_helper.c | 2 +- | 78 | tests/decode/err_field7.decode | 7 + |
59 | target/tricore/op_helper.c | 2 +- | 79 | tests/decode/err_field8.decode | 8 + |
60 | target/xtensa/helper.c | 6 +- | 80 | tests/decode/err_field9.decode | 14 ++ |
61 | tcg/tcg.c | 81 +------- | 81 | tests/decode/succ_named_field.decode | 19 ++ |
62 | tcg/{sparc => sparc64}/tcg-target.c.inc | 275 ++++++++-------------------- | 82 | tcg/tcg.c | 4 +- |
63 | MAINTAINERS | 2 +- | 83 | accel/tcg/ldst_atomicity.c.inc | 80 +------ |
64 | 30 files changed, 232 insertions(+), 437 deletions(-) | 84 | tcg/aarch64/tcg-target.c.inc | 243 +++++++++++++++----- |
65 | rename tcg/{sparc => sparc64}/tcg-target-con-set.h (69%) | 85 | tcg/i386/tcg-target.c.inc | 191 +++++++++++++++- |
66 | rename tcg/{sparc => sparc64}/tcg-target-con-str.h (77%) | 86 | tcg/ppc/tcg-target.c.inc | 108 ++++++++- |
67 | rename tcg/{sparc => sparc64}/tcg-target.h (95%) | 87 | tcg/s390x/tcg-target.c.inc | 107 ++++++++- |
68 | rename tcg/{sparc => sparc64}/tcg-target.c.inc (91%) | 88 | scripts/decodetree.py | 265 ++++++++++++++++++++-- |
89 | tests/decode/check.sh | 24 -- | ||
90 | tests/decode/meson.build | 64 ++++++ | ||
91 | tests/meson.build | 5 +- | ||
92 | 38 files changed, 1312 insertions(+), 225 deletions(-) | ||
93 | create mode 100644 host/include/aarch64/host/load-extract-al16-al8.h | ||
94 | create mode 100644 host/include/aarch64/host/store-insert-al16.h | ||
95 | create mode 100644 host/include/generic/host/load-extract-al16-al8.h | ||
96 | create mode 100644 host/include/generic/host/store-insert-al16.h | ||
97 | create mode 100644 host/include/x86_64/host/atomic128-ldst.h | ||
98 | create mode 100644 host/include/x86_64/host/load-extract-al16-al8.h | ||
99 | create mode 100644 tests/decode/err_field10.decode | ||
100 | create mode 100644 tests/decode/err_field7.decode | ||
101 | create mode 100644 tests/decode/err_field8.decode | ||
102 | create mode 100644 tests/decode/err_field9.decode | ||
103 | create mode 100644 tests/decode/succ_named_field.decode | ||
104 | delete mode 100755 tests/decode/check.sh | ||
105 | create mode 100644 tests/decode/meson.build | diff view generated by jsdifflib |
1 | From: Icenowy Zheng <uwu@icenowy.me> | 1 | The first move was incorrectly using TCG_TYPE_I32 while the second |
---|---|---|---|
2 | move was correctly using TCG_TYPE_REG. This prevents a 64-bit host | ||
3 | from moving all 128-bits of the return value. | ||
2 | 4 | ||
3 | When registering helpers via FFI for TCI, the inner loop that iterates | 5 | Fixes: ebebea53ef8 ("tcg: Support TCG_TYPE_I128 in tcg_out_{ld,st}_helper_{args,ret}") |
4 | parameters of the helper reuses (and thus pollutes) the same variable | ||
5 | used by the outer loop that iterates all helpers, thus made some helpers | ||
6 | unregistered. | ||
7 | |||
8 | Fix this logic error by using a dedicated temporary variable for the | ||
9 | inner loop. | ||
10 | |||
11 | Fixes: 22f15579fa ("tcg: Build ffi data structures for helpers") | ||
12 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
13 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
14 | Signed-off-by: Icenowy Zheng <uwu@icenowy.me> | ||
15 | Message-Id: <20221028072145.1593205-1-uwu@icenowy.me> | ||
16 | [rth: Move declaration of j to the for loop itself] | ||
17 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
7 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
18 | --- | 8 | --- |
19 | tcg/tcg.c | 6 +++--- | 9 | tcg/tcg.c | 4 ++-- |
20 | 1 file changed, 3 insertions(+), 3 deletions(-) | 10 | 1 file changed, 2 insertions(+), 2 deletions(-) |
21 | 11 | ||
22 | diff --git a/tcg/tcg.c b/tcg/tcg.c | 12 | diff --git a/tcg/tcg.c b/tcg/tcg.c |
23 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
24 | --- a/tcg/tcg.c | 14 | --- a/tcg/tcg.c |
25 | +++ b/tcg/tcg.c | 15 | +++ b/tcg/tcg.c |
26 | @@ -XXX,XX +XXX,XX @@ static void tcg_context_init(unsigned max_cpus) | 16 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst, |
27 | 17 | mov[0].dst = ldst->datalo_reg; | |
28 | if (nargs != 0) { | 18 | mov[0].src = |
29 | ca->cif.arg_types = ca->args; | 19 | tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN); |
30 | - for (i = 0; i < nargs; ++i) { | 20 | - mov[0].dst_type = TCG_TYPE_I32; |
31 | - int typecode = extract32(typemask, (i + 1) * 3, 3); | 21 | - mov[0].src_type = TCG_TYPE_I32; |
32 | - ca->args[i] = typecode_to_ffi[typecode]; | 22 | + mov[0].dst_type = TCG_TYPE_REG; |
33 | + for (int j = 0; j < nargs; ++j) { | 23 | + mov[0].src_type = TCG_TYPE_REG; |
34 | + int typecode = extract32(typemask, (j + 1) * 3, 3); | 24 | mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64; |
35 | + ca->args[j] = typecode_to_ffi[typecode]; | 25 | |
36 | } | 26 | mov[1].dst = ldst->datahi_reg; |
37 | } | ||
38 | |||
39 | -- | 27 | -- |
40 | 2.34.1 | 28 | 2.34.1 |
41 | |||
42 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | PAGE_WRITE is current writability, as modified by TB protection; | ||
2 | PAGE_WRITE_ORG is the original page writability. | ||
1 | 3 | ||
4 | Fixes: cdfac37be0d ("accel/tcg: Honor atomicity of loads") | ||
5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | accel/tcg/ldst_atomicity.c.inc | 4 ++-- | ||
9 | 1 file changed, 2 insertions(+), 2 deletions(-) | ||
10 | |||
11 | diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/accel/tcg/ldst_atomicity.c.inc | ||
14 | +++ b/accel/tcg/ldst_atomicity.c.inc | ||
15 | @@ -XXX,XX +XXX,XX @@ static uint64_t load_atomic8_or_exit(CPUArchState *env, uintptr_t ra, void *pv) | ||
16 | * another process, because the fallback start_exclusive solution | ||
17 | * provides no protection across processes. | ||
18 | */ | ||
19 | - if (!page_check_range(h2g(pv), 8, PAGE_WRITE)) { | ||
20 | + if (!page_check_range(h2g(pv), 8, PAGE_WRITE_ORG)) { | ||
21 | uint64_t *p = __builtin_assume_aligned(pv, 8); | ||
22 | return *p; | ||
23 | } | ||
24 | @@ -XXX,XX +XXX,XX @@ static Int128 load_atomic16_or_exit(CPUArchState *env, uintptr_t ra, void *pv) | ||
25 | * another process, because the fallback start_exclusive solution | ||
26 | * provides no protection across processes. | ||
27 | */ | ||
28 | - if (!page_check_range(h2g(p), 16, PAGE_WRITE)) { | ||
29 | + if (!page_check_range(h2g(p), 16, PAGE_WRITE_ORG)) { | ||
30 | return *p; | ||
31 | } | ||
32 | #endif | ||
33 | -- | ||
34 | 2.34.1 | diff view generated by jsdifflib |
1 | Emphasize that we only support full 64-bit code generation. | 1 | Older versions of clang have missing runtime functions for arithmetic |
---|---|---|---|
2 | with -fsanitize=undefined (see 464e3671f9d5c), so we cannot use | ||
3 | __int128_t for implementing Int128. But __int128_t is present, | ||
4 | data movement works, and it can be used for atomic128. | ||
5 | |||
6 | Probe for both CONFIG_INT128_TYPE and CONFIG_INT128, adjust | ||
7 | qemu/int128.h to define Int128Alias if CONFIG_INT128_TYPE, | ||
8 | and adjust the meson probe for atomics to use has_int128_type. | ||
2 | 9 | ||
3 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 10 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
6 | --- | 12 | --- |
7 | meson.build | 4 +--- | 13 | meson.build | 15 ++++++++++----- |
8 | tcg/{sparc => sparc64}/tcg-target-con-set.h | 0 | 14 | include/qemu/int128.h | 4 ++-- |
9 | tcg/{sparc => sparc64}/tcg-target-con-str.h | 0 | 15 | 2 files changed, 12 insertions(+), 7 deletions(-) |
10 | tcg/{sparc => sparc64}/tcg-target.h | 0 | ||
11 | tcg/{sparc => sparc64}/tcg-target.c.inc | 0 | ||
12 | MAINTAINERS | 2 +- | ||
13 | 6 files changed, 2 insertions(+), 4 deletions(-) | ||
14 | rename tcg/{sparc => sparc64}/tcg-target-con-set.h (100%) | ||
15 | rename tcg/{sparc => sparc64}/tcg-target-con-str.h (100%) | ||
16 | rename tcg/{sparc => sparc64}/tcg-target.h (100%) | ||
17 | rename tcg/{sparc => sparc64}/tcg-target.c.inc (100%) | ||
18 | 16 | ||
19 | diff --git a/meson.build b/meson.build | 17 | diff --git a/meson.build b/meson.build |
20 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/meson.build | 19 | --- a/meson.build |
22 | +++ b/meson.build | 20 | +++ b/meson.build |
23 | @@ -XXX,XX +XXX,XX @@ qapi_trace_events = [] | 21 | @@ -XXX,XX +XXX,XX @@ config_host_data.set('CONFIG_ATOMIC64', cc.links(''' |
24 | bsd_oses = ['gnu/kfreebsd', 'freebsd', 'netbsd', 'openbsd', 'dragonfly', 'darwin'] | 22 | return 0; |
25 | supported_oses = ['windows', 'freebsd', 'netbsd', 'openbsd', 'darwin', 'sunos', 'linux'] | 23 | }''')) |
26 | supported_cpus = ['ppc', 'ppc64', 's390x', 'riscv', 'x86', 'x86_64', | 24 | |
27 | - 'arm', 'aarch64', 'loongarch64', 'mips', 'mips64', 'sparc', 'sparc64'] | 25 | -has_int128 = cc.links(''' |
28 | + 'arm', 'aarch64', 'loongarch64', 'mips', 'mips64', 'sparc64'] | 26 | +has_int128_type = cc.compiles(''' |
29 | 27 | + __int128_t a; | |
30 | cpu = host_machine.cpu_family() | 28 | + __uint128_t b; |
31 | 29 | + int main(void) { b = a; }''') | |
32 | @@ -XXX,XX +XXX,XX @@ if get_option('tcg').allowed() | 30 | +config_host_data.set('CONFIG_INT128_TYPE', has_int128_type) |
33 | endif | 31 | + |
34 | if get_option('tcg_interpreter') | 32 | +has_int128 = has_int128_type and cc.links(''' |
35 | tcg_arch = 'tci' | 33 | __int128_t a; |
36 | - elif host_arch == 'sparc64' | 34 | __uint128_t b; |
37 | - tcg_arch = 'sparc' | 35 | int main (void) { |
38 | elif host_arch == 'x86_64' | 36 | @@ -XXX,XX +XXX,XX @@ has_int128 = cc.links(''' |
39 | tcg_arch = 'i386' | 37 | a = a * a; |
40 | elif host_arch == 'ppc64' | 38 | return 0; |
41 | diff --git a/tcg/sparc/tcg-target-con-set.h b/tcg/sparc64/tcg-target-con-set.h | 39 | }''') |
42 | similarity index 100% | 40 | - |
43 | rename from tcg/sparc/tcg-target-con-set.h | 41 | config_host_data.set('CONFIG_INT128', has_int128) |
44 | rename to tcg/sparc64/tcg-target-con-set.h | 42 | |
45 | diff --git a/tcg/sparc/tcg-target-con-str.h b/tcg/sparc64/tcg-target-con-str.h | 43 | -if has_int128 |
46 | similarity index 100% | 44 | +if has_int128_type |
47 | rename from tcg/sparc/tcg-target-con-str.h | 45 | # "do we have 128-bit atomics which are handled inline and specifically not |
48 | rename to tcg/sparc64/tcg-target-con-str.h | 46 | # via libatomic". The reason we can't use libatomic is documented in the |
49 | diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc64/tcg-target.h | 47 | # comment starting "GCC is a house divided" in include/qemu/atomic128.h. |
50 | similarity index 100% | 48 | @@ -XXX,XX +XXX,XX @@ if has_int128 |
51 | rename from tcg/sparc/tcg-target.h | 49 | # __alignof(unsigned __int128) for the host. |
52 | rename to tcg/sparc64/tcg-target.h | 50 | atomic_test_128 = ''' |
53 | diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc | 51 | int main(int ac, char **av) { |
54 | similarity index 100% | 52 | - unsigned __int128 *p = __builtin_assume_aligned(av[ac - 1], 16); |
55 | rename from tcg/sparc/tcg-target.c.inc | 53 | + __uint128_t *p = __builtin_assume_aligned(av[ac - 1], 16); |
56 | rename to tcg/sparc64/tcg-target.c.inc | 54 | p[1] = __atomic_load_n(&p[0], __ATOMIC_RELAXED); |
57 | diff --git a/MAINTAINERS b/MAINTAINERS | 55 | __atomic_store_n(&p[2], p[3], __ATOMIC_RELAXED); |
56 | __atomic_compare_exchange_n(&p[4], &p[5], p[6], 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); | ||
57 | @@ -XXX,XX +XXX,XX @@ if has_int128 | ||
58 | config_host_data.set('CONFIG_CMPXCHG128', cc.links(''' | ||
59 | int main(void) | ||
60 | { | ||
61 | - unsigned __int128 x = 0, y = 0; | ||
62 | + __uint128_t x = 0, y = 0; | ||
63 | __sync_val_compare_and_swap_16(&x, y, x); | ||
64 | return 0; | ||
65 | } | ||
66 | diff --git a/include/qemu/int128.h b/include/qemu/int128.h | ||
58 | index XXXXXXX..XXXXXXX 100644 | 67 | index XXXXXXX..XXXXXXX 100644 |
59 | --- a/MAINTAINERS | 68 | --- a/include/qemu/int128.h |
60 | +++ b/MAINTAINERS | 69 | +++ b/include/qemu/int128.h |
61 | @@ -XXX,XX +XXX,XX @@ L: qemu-s390x@nongnu.org | 70 | @@ -XXX,XX +XXX,XX @@ static inline void bswap128s(Int128 *s) |
62 | 71 | * a possible structure and the native types. Ease parameter passing | |
63 | SPARC TCG target | 72 | * via use of the transparent union extension. |
64 | S: Odd Fixes | 73 | */ |
65 | -F: tcg/sparc/ | 74 | -#ifdef CONFIG_INT128 |
66 | +F: tcg/sparc64/ | 75 | +#ifdef CONFIG_INT128_TYPE |
67 | F: disas/sparc.c | 76 | typedef union { |
68 | 77 | __uint128_t u; | |
69 | TCI TCG target | 78 | __int128_t i; |
79 | @@ -XXX,XX +XXX,XX @@ typedef union { | ||
80 | } Int128Alias __attribute__((transparent_union)); | ||
81 | #else | ||
82 | typedef Int128 Int128Alias; | ||
83 | -#endif /* CONFIG_INT128 */ | ||
84 | +#endif /* CONFIG_INT128_TYPE */ | ||
85 | |||
86 | #endif /* INT128_H */ | ||
70 | -- | 87 | -- |
71 | 2.34.1 | 88 | 2.34.1 |
72 | |||
73 | diff view generated by jsdifflib |
1 | Since 9b9c37c36439, we have only supported sparc64 cpus. | 1 | With CPUINFO_ATOMIC_VMOVDQA, we can perform proper atomic |
---|---|---|---|
2 | Debian and Gentoo now only support 64-bit sparc64 userland, | 2 | load/store without cmpxchg16b. |
3 | so it is time to drop the 32-bit sparc64 userland: sparc32plus. | ||
4 | 3 | ||
5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 4 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> |
6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
8 | --- | 6 | --- |
9 | tcg/sparc/tcg-target.h | 11 --- | 7 | host/include/x86_64/host/atomic128-ldst.h | 68 +++++++++++++++++++++++ |
10 | tcg/tcg.c | 75 +---------------- | 8 | 1 file changed, 68 insertions(+) |
11 | tcg/sparc/tcg-target.c.inc | 166 +++++++------------------------------ | 9 | create mode 100644 host/include/x86_64/host/atomic128-ldst.h |
12 | 3 files changed, 33 insertions(+), 219 deletions(-) | ||
13 | 10 | ||
14 | diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h | 11 | diff --git a/host/include/x86_64/host/atomic128-ldst.h b/host/include/x86_64/host/atomic128-ldst.h |
15 | index XXXXXXX..XXXXXXX 100644 | 12 | new file mode 100644 |
16 | --- a/tcg/sparc/tcg-target.h | 13 | index XXXXXXX..XXXXXXX |
17 | +++ b/tcg/sparc/tcg-target.h | 14 | --- /dev/null |
15 | +++ b/host/include/x86_64/host/atomic128-ldst.h | ||
18 | @@ -XXX,XX +XXX,XX @@ | 16 | @@ -XXX,XX +XXX,XX @@ |
19 | #ifndef SPARC_TCG_TARGET_H | 17 | +/* |
20 | #define SPARC_TCG_TARGET_H | 18 | + * SPDX-License-Identifier: GPL-2.0-or-later |
21 | 19 | + * Load/store for 128-bit atomic operations, x86_64 version. | |
22 | -#define TCG_TARGET_REG_BITS 64 | 20 | + * |
23 | - | 21 | + * Copyright (C) 2023 Linaro, Ltd. |
24 | #define TCG_TARGET_INSN_UNIT_SIZE 4 | 22 | + * |
25 | #define TCG_TARGET_TLB_DISPLACEMENT_BITS 32 | 23 | + * See docs/devel/atomics.rst for discussion about the guarantees each |
26 | #define TCG_TARGET_NB_REGS 32 | 24 | + * atomic primitive is meant to provide. |
27 | @@ -XXX,XX +XXX,XX @@ typedef enum { | 25 | + */ |
28 | /* used for function call generation */ | 26 | + |
29 | #define TCG_REG_CALL_STACK TCG_REG_O6 | 27 | +#ifndef AARCH64_ATOMIC128_LDST_H |
30 | 28 | +#define AARCH64_ATOMIC128_LDST_H | |
31 | -#ifdef __arch64__ | 29 | + |
32 | #define TCG_TARGET_STACK_BIAS 2047 | 30 | +#ifdef CONFIG_INT128_TYPE |
33 | #define TCG_TARGET_STACK_ALIGN 16 | 31 | +#include "host/cpuinfo.h" |
34 | #define TCG_TARGET_CALL_STACK_OFFSET (128 + 6*8 + TCG_TARGET_STACK_BIAS) | 32 | +#include "tcg/debug-assert.h" |
35 | -#else | 33 | + |
36 | -#define TCG_TARGET_STACK_BIAS 0 | 34 | +/* |
37 | -#define TCG_TARGET_STACK_ALIGN 8 | 35 | + * Through clang 16, with -mcx16, __atomic_load_n is incorrectly |
38 | -#define TCG_TARGET_CALL_STACK_OFFSET (64 + 4 + 6*4) | 36 | + * expanded to a read-write operation: lock cmpxchg16b. |
39 | -#endif | 37 | + */ |
40 | - | 38 | + |
41 | -#ifdef __arch64__ | 39 | +#define HAVE_ATOMIC128_RO likely(cpuinfo & CPUINFO_ATOMIC_VMOVDQA) |
42 | #define TCG_TARGET_EXTEND_ARGS 1 | 40 | +#define HAVE_ATOMIC128_RW 1 |
43 | -#endif | 41 | + |
44 | 42 | +static inline Int128 atomic16_read_ro(const Int128 *ptr) | |
45 | #if defined(__VIS__) && __VIS__ >= 0x300 | 43 | +{ |
46 | #define use_vis3_instructions 1 | 44 | + Int128Alias r; |
47 | diff --git a/tcg/tcg.c b/tcg/tcg.c | 45 | + |
48 | index XXXXXXX..XXXXXXX 100644 | 46 | + tcg_debug_assert(HAVE_ATOMIC128_RO); |
49 | --- a/tcg/tcg.c | 47 | + asm("vmovdqa %1, %0" : "=x" (r.i) : "m" (*ptr)); |
50 | +++ b/tcg/tcg.c | 48 | + |
51 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | 49 | + return r.s; |
52 | } | 50 | +} |
53 | #endif | 51 | + |
54 | 52 | +static inline Int128 atomic16_read_rw(Int128 *ptr) | |
55 | -#if defined(__sparc__) && !defined(__arch64__) \ | 53 | +{ |
56 | - && !defined(CONFIG_TCG_INTERPRETER) | 54 | + __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16); |
57 | - /* We have 64-bit values in one register, but need to pass as two | 55 | + Int128Alias r; |
58 | - separate parameters. Split them. */ | 56 | + |
59 | - int orig_typemask = typemask; | 57 | + if (HAVE_ATOMIC128_RO) { |
60 | - int orig_nargs = nargs; | 58 | + asm("vmovdqa %1, %0" : "=x" (r.i) : "m" (*ptr_align)); |
61 | - TCGv_i64 retl, reth; | 59 | + } else { |
62 | - TCGTemp *split_args[MAX_OPC_PARAM]; | 60 | + r.i = __sync_val_compare_and_swap_16(ptr_align, 0, 0); |
63 | - | 61 | + } |
64 | - retl = NULL; | 62 | + return r.s; |
65 | - reth = NULL; | 63 | +} |
66 | - typemask = 0; | 64 | + |
67 | - for (i = real_args = 0; i < nargs; ++i) { | 65 | +static inline void atomic16_set(Int128 *ptr, Int128 val) |
68 | - int argtype = extract32(orig_typemask, (i + 1) * 3, 3); | 66 | +{ |
69 | - bool is_64bit = (argtype & ~1) == dh_typecode_i64; | 67 | + __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16); |
70 | - | 68 | + Int128Alias new = { .s = val }; |
71 | - if (is_64bit) { | 69 | + |
72 | - TCGv_i64 orig = temp_tcgv_i64(args[i]); | 70 | + if (HAVE_ATOMIC128_RO) { |
73 | - TCGv_i32 h = tcg_temp_new_i32(); | 71 | + asm("vmovdqa %1, %0" : "=m"(*ptr_align) : "x" (new.i)); |
74 | - TCGv_i32 l = tcg_temp_new_i32(); | 72 | + } else { |
75 | - tcg_gen_extr_i64_i32(l, h, orig); | 73 | + __int128_t old; |
76 | - split_args[real_args++] = tcgv_i32_temp(h); | 74 | + do { |
77 | - typemask |= dh_typecode_i32 << (real_args * 3); | 75 | + old = *ptr_align; |
78 | - split_args[real_args++] = tcgv_i32_temp(l); | 76 | + } while (!__sync_bool_compare_and_swap_16(ptr_align, old, new.i)); |
79 | - typemask |= dh_typecode_i32 << (real_args * 3); | 77 | + } |
80 | - } else { | 78 | +} |
81 | - split_args[real_args++] = args[i]; | 79 | +#else |
82 | - typemask |= argtype << (real_args * 3); | 80 | +/* Provide QEMU_ERROR stubs. */ |
83 | - } | 81 | +#include "host/include/generic/host/atomic128-ldst.h" |
84 | - } | ||
85 | - nargs = real_args; | ||
86 | - args = split_args; | ||
87 | -#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 | ||
88 | +#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 | ||
89 | for (i = 0; i < nargs; ++i) { | ||
90 | int argtype = extract32(typemask, (i + 1) * 3, 3); | ||
91 | bool is_32bit = (argtype & ~1) == dh_typecode_i32; | ||
92 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | ||
93 | |||
94 | pi = 0; | ||
95 | if (ret != NULL) { | ||
96 | -#if defined(__sparc__) && !defined(__arch64__) \ | ||
97 | - && !defined(CONFIG_TCG_INTERPRETER) | ||
98 | - if ((typemask & 6) == dh_typecode_i64) { | ||
99 | - /* The 32-bit ABI is going to return the 64-bit value in | ||
100 | - the %o0/%o1 register pair. Prepare for this by using | ||
101 | - two return temporaries, and reassemble below. */ | ||
102 | - retl = tcg_temp_new_i64(); | ||
103 | - reth = tcg_temp_new_i64(); | ||
104 | - op->args[pi++] = tcgv_i64_arg(reth); | ||
105 | - op->args[pi++] = tcgv_i64_arg(retl); | ||
106 | - nb_rets = 2; | ||
107 | - } else { | ||
108 | - op->args[pi++] = temp_arg(ret); | ||
109 | - nb_rets = 1; | ||
110 | - } | ||
111 | -#else | ||
112 | if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) { | ||
113 | #if HOST_BIG_ENDIAN | ||
114 | op->args[pi++] = temp_arg(ret + 1); | ||
115 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | ||
116 | op->args[pi++] = temp_arg(ret); | ||
117 | nb_rets = 1; | ||
118 | } | ||
119 | -#endif | ||
120 | } else { | ||
121 | nb_rets = 0; | ||
122 | } | ||
123 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) | ||
124 | tcg_debug_assert(TCGOP_CALLI(op) == real_args); | ||
125 | tcg_debug_assert(pi <= ARRAY_SIZE(op->args)); | ||
126 | |||
127 | -#if defined(__sparc__) && !defined(__arch64__) \ | ||
128 | - && !defined(CONFIG_TCG_INTERPRETER) | ||
129 | - /* Free all of the parts we allocated above. */ | ||
130 | - for (i = real_args = 0; i < orig_nargs; ++i) { | ||
131 | - int argtype = extract32(orig_typemask, (i + 1) * 3, 3); | ||
132 | - bool is_64bit = (argtype & ~1) == dh_typecode_i64; | ||
133 | - | ||
134 | - if (is_64bit) { | ||
135 | - tcg_temp_free_internal(args[real_args++]); | ||
136 | - tcg_temp_free_internal(args[real_args++]); | ||
137 | - } else { | ||
138 | - real_args++; | ||
139 | - } | ||
140 | - } | ||
141 | - if ((orig_typemask & 6) == dh_typecode_i64) { | ||
142 | - /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them. | ||
143 | - Note that describing these as TCGv_i64 eliminates an unnecessary | ||
144 | - zero-extension that tcg_gen_concat_i32_i64 would create. */ | ||
145 | - tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth); | ||
146 | - tcg_temp_free_i64(retl); | ||
147 | - tcg_temp_free_i64(reth); | ||
148 | - } | ||
149 | -#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 | ||
150 | +#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 | ||
151 | for (i = 0; i < nargs; ++i) { | ||
152 | int argtype = extract32(typemask, (i + 1) * 3, 3); | ||
153 | bool is_32bit = (argtype & ~1) == dh_typecode_i32; | ||
154 | diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc | ||
155 | index XXXXXXX..XXXXXXX 100644 | ||
156 | --- a/tcg/sparc/tcg-target.c.inc | ||
157 | +++ b/tcg/sparc/tcg-target.c.inc | ||
158 | @@ -XXX,XX +XXX,XX @@ | ||
159 | * THE SOFTWARE. | ||
160 | */ | ||
161 | |||
162 | +/* We only support generating code for 64-bit mode. */ | ||
163 | +#ifndef __arch64__ | ||
164 | +#error "unsupported code generation mode" | ||
165 | +#endif | 82 | +#endif |
166 | + | 83 | + |
167 | #include "../tcg-pool.c.inc" | 84 | +#endif /* AARCH64_ATOMIC128_LDST_H */ |
168 | |||
169 | #ifdef CONFIG_DEBUG_TCG | ||
170 | @@ -XXX,XX +XXX,XX @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { | ||
171 | }; | ||
172 | #endif | ||
173 | |||
174 | -#ifdef __arch64__ | ||
175 | -# define SPARC64 1 | ||
176 | -#else | ||
177 | -# define SPARC64 0 | ||
178 | -#endif | ||
179 | - | ||
180 | #define TCG_CT_CONST_S11 0x100 | ||
181 | #define TCG_CT_CONST_S13 0x200 | ||
182 | #define TCG_CT_CONST_ZERO 0x400 | ||
183 | @@ -XXX,XX +XXX,XX @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { | ||
184 | * high bits of the %i and %l registers garbage at all times. | ||
185 | */ | ||
186 | #define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32) | ||
187 | -#if SPARC64 | ||
188 | # define ALL_GENERAL_REGS64 ALL_GENERAL_REGS | ||
189 | -#else | ||
190 | -# define ALL_GENERAL_REGS64 MAKE_64BIT_MASK(0, 16) | ||
191 | -#endif | ||
192 | #define ALL_QLDST_REGS (ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS) | ||
193 | #define ALL_QLDST_REGS64 (ALL_GENERAL_REGS64 & ~SOFTMMU_RESERVE_REGS) | ||
194 | |||
195 | @@ -XXX,XX +XXX,XX @@ static bool check_fit_i32(int32_t val, unsigned int bits) | ||
196 | } | ||
197 | |||
198 | #define check_fit_tl check_fit_i64 | ||
199 | -#if SPARC64 | ||
200 | -# define check_fit_ptr check_fit_i64 | ||
201 | -#else | ||
202 | -# define check_fit_ptr check_fit_i32 | ||
203 | -#endif | ||
204 | +#define check_fit_ptr check_fit_i64 | ||
205 | |||
206 | static bool patch_reloc(tcg_insn_unit *src_rw, int type, | ||
207 | intptr_t value, intptr_t addend) | ||
208 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_sety(TCGContext *s, TCGReg rs) | ||
209 | tcg_out32(s, WRY | INSN_RS1(TCG_REG_G0) | INSN_RS2(rs)); | ||
210 | } | ||
211 | |||
212 | -static void tcg_out_rdy(TCGContext *s, TCGReg rd) | ||
213 | -{ | ||
214 | - tcg_out32(s, RDY | INSN_RD(rd)); | ||
215 | -} | ||
216 | - | ||
217 | static void tcg_out_div32(TCGContext *s, TCGReg rd, TCGReg rs1, | ||
218 | int32_t val2, int val2const, int uns) | ||
219 | { | ||
220 | @@ -XXX,XX +XXX,XX @@ static void emit_extend(TCGContext *s, TCGReg r, int op) | ||
221 | tcg_out_arithi(s, r, r, 16, SHIFT_SRL); | ||
222 | break; | ||
223 | case MO_32: | ||
224 | - if (SPARC64) { | ||
225 | - tcg_out_arith(s, r, r, 0, SHIFT_SRL); | ||
226 | - } | ||
227 | + tcg_out_arith(s, r, r, 0, SHIFT_SRL); | ||
228 | break; | ||
229 | case MO_64: | ||
230 | break; | ||
231 | @@ -XXX,XX +XXX,XX @@ static void build_trampolines(TCGContext *s) | ||
232 | }; | ||
233 | |||
234 | int i; | ||
235 | - TCGReg ra; | ||
236 | |||
237 | for (i = 0; i < ARRAY_SIZE(qemu_ld_helpers); ++i) { | ||
238 | if (qemu_ld_helpers[i] == NULL) { | ||
239 | @@ -XXX,XX +XXX,XX @@ static void build_trampolines(TCGContext *s) | ||
240 | } | ||
241 | qemu_ld_trampoline[i] = tcg_splitwx_to_rx(s->code_ptr); | ||
242 | |||
243 | - if (SPARC64 || TARGET_LONG_BITS == 32) { | ||
244 | - ra = TCG_REG_O3; | ||
245 | - } else { | ||
246 | - /* Install the high part of the address. */ | ||
247 | - tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O2, 32, SHIFT_SRLX); | ||
248 | - ra = TCG_REG_O4; | ||
249 | - } | ||
250 | - | ||
251 | /* Set the retaddr operand. */ | ||
252 | - tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7); | ||
253 | + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O3, TCG_REG_O7); | ||
254 | /* Tail call. */ | ||
255 | tcg_out_jmpl_const(s, qemu_ld_helpers[i], true, true); | ||
256 | /* delay slot -- set the env argument */ | ||
257 | @@ -XXX,XX +XXX,XX @@ static void build_trampolines(TCGContext *s) | ||
258 | } | ||
259 | qemu_st_trampoline[i] = tcg_splitwx_to_rx(s->code_ptr); | ||
260 | |||
261 | - if (SPARC64) { | ||
262 | - emit_extend(s, TCG_REG_O2, i); | ||
263 | - ra = TCG_REG_O4; | ||
264 | - } else { | ||
265 | - ra = TCG_REG_O1; | ||
266 | - if (TARGET_LONG_BITS == 64) { | ||
267 | - /* Install the high part of the address. */ | ||
268 | - tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX); | ||
269 | - ra += 2; | ||
270 | - } else { | ||
271 | - ra += 1; | ||
272 | - } | ||
273 | - if ((i & MO_SIZE) == MO_64) { | ||
274 | - /* Install the high part of the data. */ | ||
275 | - tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX); | ||
276 | - ra += 2; | ||
277 | - } else { | ||
278 | - emit_extend(s, ra, i); | ||
279 | - ra += 1; | ||
280 | - } | ||
281 | - /* Skip the oi argument. */ | ||
282 | - ra += 1; | ||
283 | - } | ||
284 | - | ||
285 | + emit_extend(s, TCG_REG_O2, i); | ||
286 | + | ||
287 | /* Set the retaddr operand. */ | ||
288 | - if (ra >= TCG_REG_O6) { | ||
289 | - tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_O7, TCG_REG_CALL_STACK, | ||
290 | - TCG_TARGET_CALL_STACK_OFFSET); | ||
291 | - } else { | ||
292 | - tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7); | ||
293 | - } | ||
294 | + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O4, TCG_REG_O7); | ||
295 | |||
296 | /* Tail call. */ | ||
297 | tcg_out_jmpl_const(s, qemu_st_helpers[i], true, true); | ||
298 | @@ -XXX,XX +XXX,XX @@ static void build_trampolines(TCGContext *s) | ||
299 | qemu_unalign_st_trampoline = tcg_splitwx_to_rx(s->code_ptr); | ||
300 | } | ||
301 | |||
302 | - if (!SPARC64 && TARGET_LONG_BITS == 64) { | ||
303 | - /* Install the high part of the address. */ | ||
304 | - tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O2, 32, SHIFT_SRLX); | ||
305 | - } | ||
306 | - | ||
307 | /* Tail call. */ | ||
308 | tcg_out_jmpl_const(s, helper, true, true); | ||
309 | /* delay slot -- set the env argument */ | ||
310 | @@ -XXX,XX +XXX,XX @@ static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index, | ||
311 | tcg_out_cmp(s, r0, r2, 0); | ||
312 | |||
313 | /* If the guest address must be zero-extended, do so now. */ | ||
314 | - if (SPARC64 && TARGET_LONG_BITS == 32) { | ||
315 | + if (TARGET_LONG_BITS == 32) { | ||
316 | tcg_out_arithi(s, r0, addr, 0, SHIFT_SRL); | ||
317 | return r0; | ||
318 | } | ||
319 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, | ||
320 | |||
321 | #ifdef CONFIG_SOFTMMU | ||
322 | unsigned memi = get_mmuidx(oi); | ||
323 | - TCGReg addrz, param; | ||
324 | + TCGReg addrz; | ||
325 | const tcg_insn_unit *func; | ||
326 | |||
327 | addrz = tcg_out_tlb_load(s, addr, memi, memop, | ||
328 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, | ||
329 | |||
330 | /* TLB Miss. */ | ||
331 | |||
332 | - param = TCG_REG_O1; | ||
333 | - if (!SPARC64 && TARGET_LONG_BITS == 64) { | ||
334 | - /* Skip the high-part; we'll perform the extract in the trampoline. */ | ||
335 | - param++; | ||
336 | - } | ||
337 | - tcg_out_mov(s, TCG_TYPE_REG, param++, addrz); | ||
338 | + tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_O1, addrz); | ||
339 | |||
340 | /* We use the helpers to extend SB and SW data, leaving the case | ||
341 | of SL needing explicit extending below. */ | ||
342 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, | ||
343 | tcg_debug_assert(func != NULL); | ||
344 | tcg_out_call_nodelay(s, func, false); | ||
345 | /* delay slot */ | ||
346 | - tcg_out_movi(s, TCG_TYPE_I32, param, oi); | ||
347 | + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_O2, oi); | ||
348 | |||
349 | - /* Recall that all of the helpers return 64-bit results. | ||
350 | - Which complicates things for sparcv8plus. */ | ||
351 | - if (SPARC64) { | ||
352 | - /* We let the helper sign-extend SB and SW, but leave SL for here. */ | ||
353 | - if (is_64 && (memop & MO_SSIZE) == MO_SL) { | ||
354 | - tcg_out_arithi(s, data, TCG_REG_O0, 0, SHIFT_SRA); | ||
355 | - } else { | ||
356 | - tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0); | ||
357 | - } | ||
358 | + /* We let the helper sign-extend SB and SW, but leave SL for here. */ | ||
359 | + if (is_64 && (memop & MO_SSIZE) == MO_SL) { | ||
360 | + tcg_out_arithi(s, data, TCG_REG_O0, 0, SHIFT_SRA); | ||
361 | } else { | ||
362 | - if ((memop & MO_SIZE) == MO_64) { | ||
363 | - tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, 32, SHIFT_SLLX); | ||
364 | - tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O1, 0, SHIFT_SRL); | ||
365 | - tcg_out_arith(s, data, TCG_REG_O0, TCG_REG_O1, ARITH_OR); | ||
366 | - } else if (is_64) { | ||
367 | - /* Re-extend from 32-bit rather than reassembling when we | ||
368 | - know the high register must be an extension. */ | ||
369 | - tcg_out_arithi(s, data, TCG_REG_O1, 0, | ||
370 | - memop & MO_SIGN ? SHIFT_SRA : SHIFT_SRL); | ||
371 | - } else { | ||
372 | - tcg_out_mov(s, TCG_TYPE_I32, data, TCG_REG_O1); | ||
373 | - } | ||
374 | + tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0); | ||
375 | } | ||
376 | |||
377 | *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr)); | ||
378 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, | ||
379 | unsigned s_bits = memop & MO_SIZE; | ||
380 | unsigned t_bits; | ||
381 | |||
382 | - if (SPARC64 && TARGET_LONG_BITS == 32) { | ||
383 | + if (TARGET_LONG_BITS == 32) { | ||
384 | tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL); | ||
385 | addr = TCG_REG_T1; | ||
386 | } | ||
387 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, | ||
388 | * operation in the delay slot, and failure need only invoke the | ||
389 | * handler for SIGBUS. | ||
390 | */ | ||
391 | - TCGReg arg_low = TCG_REG_O1 + (!SPARC64 && TARGET_LONG_BITS == 64); | ||
392 | tcg_out_call_nodelay(s, qemu_unalign_ld_trampoline, false); | ||
393 | /* delay slot -- move to low part of argument reg */ | ||
394 | - tcg_out_mov_delay(s, arg_low, addr); | ||
395 | + tcg_out_mov_delay(s, TCG_REG_O1, addr); | ||
396 | } else { | ||
397 | /* Underalignment: load by pieces of minimum alignment. */ | ||
398 | int ld_opc, a_size, s_size, i; | ||
399 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, | ||
400 | |||
401 | #ifdef CONFIG_SOFTMMU | ||
402 | unsigned memi = get_mmuidx(oi); | ||
403 | - TCGReg addrz, param; | ||
404 | + TCGReg addrz; | ||
405 | const tcg_insn_unit *func; | ||
406 | |||
407 | addrz = tcg_out_tlb_load(s, addr, memi, memop, | ||
408 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, | ||
409 | |||
410 | /* TLB Miss. */ | ||
411 | |||
412 | - param = TCG_REG_O1; | ||
413 | - if (!SPARC64 && TARGET_LONG_BITS == 64) { | ||
414 | - /* Skip the high-part; we'll perform the extract in the trampoline. */ | ||
415 | - param++; | ||
416 | - } | ||
417 | - tcg_out_mov(s, TCG_TYPE_REG, param++, addrz); | ||
418 | - if (!SPARC64 && (memop & MO_SIZE) == MO_64) { | ||
419 | - /* Skip the high-part; we'll perform the extract in the trampoline. */ | ||
420 | - param++; | ||
421 | - } | ||
422 | - tcg_out_mov(s, TCG_TYPE_REG, param++, data); | ||
423 | + tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_O1, addrz); | ||
424 | + tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_O2, data); | ||
425 | |||
426 | func = qemu_st_trampoline[memop & (MO_BSWAP | MO_SIZE)]; | ||
427 | tcg_debug_assert(func != NULL); | ||
428 | tcg_out_call_nodelay(s, func, false); | ||
429 | /* delay slot */ | ||
430 | - tcg_out_movi(s, TCG_TYPE_I32, param, oi); | ||
431 | + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_O3, oi); | ||
432 | |||
433 | *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr)); | ||
434 | #else | ||
435 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, | ||
436 | unsigned s_bits = memop & MO_SIZE; | ||
437 | unsigned t_bits; | ||
438 | |||
439 | - if (SPARC64 && TARGET_LONG_BITS == 32) { | ||
440 | + if (TARGET_LONG_BITS == 32) { | ||
441 | tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL); | ||
442 | addr = TCG_REG_T1; | ||
443 | } | ||
444 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, | ||
445 | * operation in the delay slot, and failure need only invoke the | ||
446 | * handler for SIGBUS. | ||
447 | */ | ||
448 | - TCGReg arg_low = TCG_REG_O1 + (!SPARC64 && TARGET_LONG_BITS == 64); | ||
449 | tcg_out_call_nodelay(s, qemu_unalign_st_trampoline, false); | ||
450 | /* delay slot -- move to low part of argument reg */ | ||
451 | - tcg_out_mov_delay(s, arg_low, addr); | ||
452 | + tcg_out_mov_delay(s, TCG_REG_O1, addr); | ||
453 | } else { | ||
454 | /* Underalignment: store by pieces of minimum alignment. */ | ||
455 | int st_opc, a_size, s_size, i; | ||
456 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, | ||
457 | case INDEX_op_muls2_i32: | ||
458 | c = ARITH_SMUL; | ||
459 | do_mul2: | ||
460 | - /* The 32-bit multiply insns produce a full 64-bit result. If the | ||
461 | - destination register can hold it, we can avoid the slower RDY. */ | ||
462 | + /* The 32-bit multiply insns produce a full 64-bit result. */ | ||
463 | tcg_out_arithc(s, a0, a2, args[3], const_args[3], c); | ||
464 | - if (SPARC64 || a0 <= TCG_REG_O7) { | ||
465 | - tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX); | ||
466 | - } else { | ||
467 | - tcg_out_rdy(s, a1); | ||
468 | - } | ||
469 | + tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX); | ||
470 | break; | ||
471 | |||
472 | case INDEX_op_qemu_ld_i32: | ||
473 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | ||
474 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_T2); /* for internal use */ | ||
475 | } | ||
476 | |||
477 | -#if SPARC64 | ||
478 | -# define ELF_HOST_MACHINE EM_SPARCV9 | ||
479 | -#else | ||
480 | -# define ELF_HOST_MACHINE EM_SPARC32PLUS | ||
481 | -# define ELF_HOST_FLAGS EF_SPARC_32PLUS | ||
482 | -#endif | ||
483 | +#define ELF_HOST_MACHINE EM_SPARCV9 | ||
484 | |||
485 | typedef struct { | ||
486 | DebugFrameHeader h; | ||
487 | - uint8_t fde_def_cfa[SPARC64 ? 4 : 2]; | ||
488 | + uint8_t fde_def_cfa[4]; | ||
489 | uint8_t fde_win_save; | ||
490 | uint8_t fde_ret_save[3]; | ||
491 | } DebugFrame; | ||
492 | @@ -XXX,XX +XXX,XX @@ static const DebugFrame debug_frame = { | ||
493 | .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), | ||
494 | |||
495 | .fde_def_cfa = { | ||
496 | -#if SPARC64 | ||
497 | 12, 30, /* DW_CFA_def_cfa i6, 2047 */ | ||
498 | (2047 & 0x7f) | 0x80, (2047 >> 7) | ||
499 | -#else | ||
500 | - 13, 30 /* DW_CFA_def_cfa_register i6 */ | ||
501 | -#endif | ||
502 | }, | ||
503 | .fde_win_save = 0x2d, /* DW_CFA_GNU_window_save */ | ||
504 | .fde_ret_save = { 9, 15, 31 }, /* DW_CFA_register o7, i7 */ | ||
505 | -- | 85 | -- |
506 | 2.34.1 | 86 | 2.34.1 |
507 | 87 | ||
508 | 88 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
2 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
3 | --- | ||
4 | tcg/i386/tcg-target.h | 4 +- | ||
5 | tcg/i386/tcg-target.c.inc | 191 +++++++++++++++++++++++++++++++++++++- | ||
6 | 2 files changed, 190 insertions(+), 5 deletions(-) | ||
1 | 7 | ||
8 | diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h | ||
9 | index XXXXXXX..XXXXXXX 100644 | ||
10 | --- a/tcg/i386/tcg-target.h | ||
11 | +++ b/tcg/i386/tcg-target.h | ||
12 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
13 | #define have_avx1 (cpuinfo & CPUINFO_AVX1) | ||
14 | #define have_avx2 (cpuinfo & CPUINFO_AVX2) | ||
15 | #define have_movbe (cpuinfo & CPUINFO_MOVBE) | ||
16 | -#define have_atomic16 (cpuinfo & CPUINFO_ATOMIC_VMOVDQA) | ||
17 | |||
18 | /* | ||
19 | * There are interesting instructions in AVX512, so long as we have AVX512VL, | ||
20 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
21 | #define TCG_TARGET_HAS_qemu_st8_i32 1 | ||
22 | #endif | ||
23 | |||
24 | -#define TCG_TARGET_HAS_qemu_ldst_i128 0 | ||
25 | +#define TCG_TARGET_HAS_qemu_ldst_i128 \ | ||
26 | + (TCG_TARGET_REG_BITS == 64 && (cpuinfo & CPUINFO_ATOMIC_VMOVDQA)) | ||
27 | |||
28 | /* We do not support older SSE systems, only beginning with AVX1. */ | ||
29 | #define TCG_TARGET_HAS_v64 have_avx1 | ||
30 | diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc | ||
31 | index XXXXXXX..XXXXXXX 100644 | ||
32 | --- a/tcg/i386/tcg-target.c.inc | ||
33 | +++ b/tcg/i386/tcg-target.c.inc | ||
34 | @@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = { | ||
35 | #endif | ||
36 | }; | ||
37 | |||
38 | +#define TCG_TMP_VEC TCG_REG_XMM5 | ||
39 | + | ||
40 | static const int tcg_target_call_iarg_regs[] = { | ||
41 | #if TCG_TARGET_REG_BITS == 64 | ||
42 | #if defined(_WIN64) | ||
43 | @@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) | ||
44 | #define OPC_PCMPGTW (0x65 | P_EXT | P_DATA16) | ||
45 | #define OPC_PCMPGTD (0x66 | P_EXT | P_DATA16) | ||
46 | #define OPC_PCMPGTQ (0x37 | P_EXT38 | P_DATA16) | ||
47 | +#define OPC_PEXTRD (0x16 | P_EXT3A | P_DATA16) | ||
48 | +#define OPC_PINSRD (0x22 | P_EXT3A | P_DATA16) | ||
49 | #define OPC_PMAXSB (0x3c | P_EXT38 | P_DATA16) | ||
50 | #define OPC_PMAXSW (0xee | P_EXT | P_DATA16) | ||
51 | #define OPC_PMAXSD (0x3d | P_EXT38 | P_DATA16) | ||
52 | @@ -XXX,XX +XXX,XX @@ typedef struct { | ||
53 | |||
54 | bool tcg_target_has_memory_bswap(MemOp memop) | ||
55 | { | ||
56 | - return have_movbe; | ||
57 | + TCGAtomAlign aa; | ||
58 | + | ||
59 | + if (!have_movbe) { | ||
60 | + return false; | ||
61 | + } | ||
62 | + if ((memop & MO_SIZE) < MO_128) { | ||
63 | + return true; | ||
64 | + } | ||
65 | + | ||
66 | + /* | ||
67 | + * Reject 16-byte memop with 16-byte atomicity, i.e. VMOVDQA, | ||
68 | + * but do allow a pair of 64-bit operations, i.e. MOVBEQ. | ||
69 | + */ | ||
70 | + aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true); | ||
71 | + return aa.atom < MO_128; | ||
72 | } | ||
73 | |||
74 | /* | ||
75 | @@ -XXX,XX +XXX,XX @@ static const TCGLdstHelperParam ldst_helper_param = { | ||
76 | static const TCGLdstHelperParam ldst_helper_param = { }; | ||
77 | #endif | ||
78 | |||
79 | +static void tcg_out_vec_to_pair(TCGContext *s, TCGType type, | ||
80 | + TCGReg l, TCGReg h, TCGReg v) | ||
81 | +{ | ||
82 | + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; | ||
83 | + | ||
84 | + /* vpmov{d,q} %v, %l */ | ||
85 | + tcg_out_vex_modrm(s, OPC_MOVD_EyVy + rexw, v, 0, l); | ||
86 | + /* vpextr{d,q} $1, %v, %h */ | ||
87 | + tcg_out_vex_modrm(s, OPC_PEXTRD + rexw, v, 0, h); | ||
88 | + tcg_out8(s, 1); | ||
89 | +} | ||
90 | + | ||
91 | +static void tcg_out_pair_to_vec(TCGContext *s, TCGType type, | ||
92 | + TCGReg v, TCGReg l, TCGReg h) | ||
93 | +{ | ||
94 | + int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW; | ||
95 | + | ||
96 | + /* vmov{d,q} %l, %v */ | ||
97 | + tcg_out_vex_modrm(s, OPC_MOVD_VyEy + rexw, v, 0, l); | ||
98 | + /* vpinsr{d,q} $1, %h, %v, %v */ | ||
99 | + tcg_out_vex_modrm(s, OPC_PINSRD + rexw, v, v, h); | ||
100 | + tcg_out8(s, 1); | ||
101 | +} | ||
102 | + | ||
103 | /* | ||
104 | * Generate code for the slow path for a load at the end of block | ||
105 | */ | ||
106 | @@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, | ||
107 | { | ||
108 | TCGLabelQemuLdst *ldst = NULL; | ||
109 | MemOp opc = get_memop(oi); | ||
110 | + MemOp s_bits = opc & MO_SIZE; | ||
111 | unsigned a_mask; | ||
112 | |||
113 | #ifdef CONFIG_SOFTMMU | ||
114 | @@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, | ||
115 | *h = x86_guest_base; | ||
116 | #endif | ||
117 | h->base = addrlo; | ||
118 | - h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false); | ||
119 | + h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128); | ||
120 | a_mask = (1 << h->aa.align) - 1; | ||
121 | |||
122 | #ifdef CONFIG_SOFTMMU | ||
123 | @@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, | ||
124 | TCGType tlbtype = TCG_TYPE_I32; | ||
125 | int trexw = 0, hrexw = 0, tlbrexw = 0; | ||
126 | unsigned mem_index = get_mmuidx(oi); | ||
127 | - unsigned s_bits = opc & MO_SIZE; | ||
128 | unsigned s_mask = (1 << s_bits) - 1; | ||
129 | int tlb_mask; | ||
130 | |||
131 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, | ||
132 | h.base, h.index, 0, h.ofs + 4); | ||
133 | } | ||
134 | break; | ||
135 | + | ||
136 | + case MO_128: | ||
137 | + tcg_debug_assert(TCG_TARGET_REG_BITS == 64); | ||
138 | + | ||
139 | + /* | ||
140 | + * Without 16-byte atomicity, use integer regs. | ||
141 | + * That is where we want the data, and it allows bswaps. | ||
142 | + */ | ||
143 | + if (h.aa.atom < MO_128) { | ||
144 | + if (use_movbe) { | ||
145 | + TCGReg t = datalo; | ||
146 | + datalo = datahi; | ||
147 | + datahi = t; | ||
148 | + } | ||
149 | + if (h.base == datalo || h.index == datalo) { | ||
150 | + tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, datahi, | ||
151 | + h.base, h.index, 0, h.ofs); | ||
152 | + tcg_out_modrm_offset(s, movop + P_REXW + h.seg, | ||
153 | + datalo, datahi, 0); | ||
154 | + tcg_out_modrm_offset(s, movop + P_REXW + h.seg, | ||
155 | + datahi, datahi, 8); | ||
156 | + } else { | ||
157 | + tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo, | ||
158 | + h.base, h.index, 0, h.ofs); | ||
159 | + tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datahi, | ||
160 | + h.base, h.index, 0, h.ofs + 8); | ||
161 | + } | ||
162 | + break; | ||
163 | + } | ||
164 | + | ||
165 | + /* | ||
166 | + * With 16-byte atomicity, a vector load is required. | ||
167 | + * If we already have 16-byte alignment, then VMOVDQA always works. | ||
168 | + * Else if VMOVDQU has atomicity with dynamic alignment, use that. | ||
169 | + * Else use we require a runtime test for alignment for VMOVDQA; | ||
170 | + * use VMOVDQU on the unaligned nonatomic path for simplicity. | ||
171 | + */ | ||
172 | + if (h.aa.align >= MO_128) { | ||
173 | + tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_VxWx + h.seg, | ||
174 | + TCG_TMP_VEC, 0, | ||
175 | + h.base, h.index, 0, h.ofs); | ||
176 | + } else if (cpuinfo & CPUINFO_ATOMIC_VMOVDQU) { | ||
177 | + tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_VxWx + h.seg, | ||
178 | + TCG_TMP_VEC, 0, | ||
179 | + h.base, h.index, 0, h.ofs); | ||
180 | + } else { | ||
181 | + TCGLabel *l1 = gen_new_label(); | ||
182 | + TCGLabel *l2 = gen_new_label(); | ||
183 | + | ||
184 | + tcg_out_testi(s, h.base, 15); | ||
185 | + tcg_out_jxx(s, JCC_JNE, l1, true); | ||
186 | + | ||
187 | + tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_VxWx + h.seg, | ||
188 | + TCG_TMP_VEC, 0, | ||
189 | + h.base, h.index, 0, h.ofs); | ||
190 | + tcg_out_jxx(s, JCC_JMP, l2, true); | ||
191 | + | ||
192 | + tcg_out_label(s, l1); | ||
193 | + tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_VxWx + h.seg, | ||
194 | + TCG_TMP_VEC, 0, | ||
195 | + h.base, h.index, 0, h.ofs); | ||
196 | + tcg_out_label(s, l2); | ||
197 | + } | ||
198 | + tcg_out_vec_to_pair(s, TCG_TYPE_I64, datalo, datahi, TCG_TMP_VEC); | ||
199 | + break; | ||
200 | + | ||
201 | default: | ||
202 | g_assert_not_reached(); | ||
203 | } | ||
204 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, | ||
205 | h.base, h.index, 0, h.ofs + 4); | ||
206 | } | ||
207 | break; | ||
208 | + | ||
209 | + case MO_128: | ||
210 | + tcg_debug_assert(TCG_TARGET_REG_BITS == 64); | ||
211 | + | ||
212 | + /* | ||
213 | + * Without 16-byte atomicity, use integer regs. | ||
214 | + * That is where we have the data, and it allows bswaps. | ||
215 | + */ | ||
216 | + if (h.aa.atom < MO_128) { | ||
217 | + if (use_movbe) { | ||
218 | + TCGReg t = datalo; | ||
219 | + datalo = datahi; | ||
220 | + datahi = t; | ||
221 | + } | ||
222 | + tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo, | ||
223 | + h.base, h.index, 0, h.ofs); | ||
224 | + tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datahi, | ||
225 | + h.base, h.index, 0, h.ofs + 8); | ||
226 | + break; | ||
227 | + } | ||
228 | + | ||
229 | + /* | ||
230 | + * With 16-byte atomicity, a vector store is required. | ||
231 | + * If we already have 16-byte alignment, then VMOVDQA always works. | ||
232 | + * Else if VMOVDQU has atomicity with dynamic alignment, use that. | ||
233 | + * Else use we require a runtime test for alignment for VMOVDQA; | ||
234 | + * use VMOVDQU on the unaligned nonatomic path for simplicity. | ||
235 | + */ | ||
236 | + tcg_out_pair_to_vec(s, TCG_TYPE_I64, TCG_TMP_VEC, datalo, datahi); | ||
237 | + if (h.aa.align >= MO_128) { | ||
238 | + tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_WxVx + h.seg, | ||
239 | + TCG_TMP_VEC, 0, | ||
240 | + h.base, h.index, 0, h.ofs); | ||
241 | + } else if (cpuinfo & CPUINFO_ATOMIC_VMOVDQU) { | ||
242 | + tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_WxVx + h.seg, | ||
243 | + TCG_TMP_VEC, 0, | ||
244 | + h.base, h.index, 0, h.ofs); | ||
245 | + } else { | ||
246 | + TCGLabel *l1 = gen_new_label(); | ||
247 | + TCGLabel *l2 = gen_new_label(); | ||
248 | + | ||
249 | + tcg_out_testi(s, h.base, 15); | ||
250 | + tcg_out_jxx(s, JCC_JNE, l1, true); | ||
251 | + | ||
252 | + tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_WxVx + h.seg, | ||
253 | + TCG_TMP_VEC, 0, | ||
254 | + h.base, h.index, 0, h.ofs); | ||
255 | + tcg_out_jxx(s, JCC_JMP, l2, true); | ||
256 | + | ||
257 | + tcg_out_label(s, l1); | ||
258 | + tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_WxVx + h.seg, | ||
259 | + TCG_TMP_VEC, 0, | ||
260 | + h.base, h.index, 0, h.ofs); | ||
261 | + tcg_out_label(s, l2); | ||
262 | + } | ||
263 | + break; | ||
264 | + | ||
265 | default: | ||
266 | g_assert_not_reached(); | ||
267 | } | ||
268 | @@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, | ||
269 | tcg_out_qemu_ld(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64); | ||
270 | } | ||
271 | break; | ||
272 | + case INDEX_op_qemu_ld_a32_i128: | ||
273 | + case INDEX_op_qemu_ld_a64_i128: | ||
274 | + tcg_debug_assert(TCG_TARGET_REG_BITS == 64); | ||
275 | + tcg_out_qemu_ld(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128); | ||
276 | + break; | ||
277 | |||
278 | case INDEX_op_qemu_st_a64_i32: | ||
279 | case INDEX_op_qemu_st8_a64_i32: | ||
280 | @@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, | ||
281 | tcg_out_qemu_st(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64); | ||
282 | } | ||
283 | break; | ||
284 | + case INDEX_op_qemu_st_a32_i128: | ||
285 | + case INDEX_op_qemu_st_a64_i128: | ||
286 | + tcg_debug_assert(TCG_TARGET_REG_BITS == 64); | ||
287 | + tcg_out_qemu_st(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128); | ||
288 | + break; | ||
289 | |||
290 | OP_32_64(mulu2): | ||
291 | tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]); | ||
292 | @@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) | ||
293 | case INDEX_op_qemu_st_a64_i64: | ||
294 | return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I4(L, L, L, L); | ||
295 | |||
296 | + case INDEX_op_qemu_ld_a32_i128: | ||
297 | + case INDEX_op_qemu_ld_a64_i128: | ||
298 | + tcg_debug_assert(TCG_TARGET_REG_BITS == 64); | ||
299 | + return C_O2_I1(r, r, L); | ||
300 | + case INDEX_op_qemu_st_a32_i128: | ||
301 | + case INDEX_op_qemu_st_a64_i128: | ||
302 | + tcg_debug_assert(TCG_TARGET_REG_BITS == 64); | ||
303 | + return C_O0_I3(L, L, L); | ||
304 | + | ||
305 | case INDEX_op_brcond2_i32: | ||
306 | return C_O0_I4(r, r, ri, ri); | ||
307 | |||
308 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | ||
309 | |||
310 | s->reserved_regs = 0; | ||
311 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); | ||
312 | + tcg_regset_set_reg(s->reserved_regs, TCG_TMP_VEC); | ||
313 | #ifdef _WIN64 | ||
314 | /* These are call saved, and we don't save them, so don't use them. */ | ||
315 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM6); | ||
316 | -- | ||
317 | 2.34.1 | diff view generated by jsdifflib |
1 | The value passed is always true, and if the target's | 1 | We will need to allocate a second general-purpose temporary. |
---|---|---|---|
2 | synchronize_from_tb hook is non-trivial, not exiting | 2 | Rename the existing temps to add a distinguishing number. |
3 | may be erroneous. | ||
4 | 3 | ||
5 | Reviewed-by: Claudio Fontana <cfontana@suse.de> | 4 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
7 | --- | 6 | --- |
8 | include/exec/exec-all.h | 5 +---- | 7 | tcg/aarch64/tcg-target.c.inc | 50 ++++++++++++++++++------------------ |
9 | accel/tcg/cpu-exec-common.c | 2 +- | 8 | 1 file changed, 25 insertions(+), 25 deletions(-) |
10 | accel/tcg/translate-all.c | 12 ++---------- | ||
11 | target/alpha/helper.c | 2 +- | ||
12 | target/alpha/mem_helper.c | 2 +- | ||
13 | target/arm/op_helper.c | 2 +- | ||
14 | target/arm/tlb_helper.c | 8 ++++---- | ||
15 | target/cris/helper.c | 2 +- | ||
16 | target/i386/tcg/sysemu/svm_helper.c | 2 +- | ||
17 | target/m68k/op_helper.c | 4 ++-- | ||
18 | target/microblaze/helper.c | 2 +- | ||
19 | target/nios2/op_helper.c | 2 +- | ||
20 | target/openrisc/sys_helper.c | 4 ++-- | ||
21 | target/ppc/excp_helper.c | 2 +- | ||
22 | target/s390x/tcg/excp_helper.c | 2 +- | ||
23 | target/tricore/op_helper.c | 2 +- | ||
24 | target/xtensa/helper.c | 6 +++--- | ||
25 | 17 files changed, 25 insertions(+), 36 deletions(-) | ||
26 | 9 | ||
27 | diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h | 10 | diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc |
28 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
29 | --- a/include/exec/exec-all.h | 12 | --- a/tcg/aarch64/tcg-target.c.inc |
30 | +++ b/include/exec/exec-all.h | 13 | +++ b/tcg/aarch64/tcg-target.c.inc |
31 | @@ -XXX,XX +XXX,XX @@ bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data); | 14 | @@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) |
32 | * cpu_restore_state: | 15 | return TCG_REG_X0 + slot; |
33 | * @cpu: the cpu context | 16 | } |
34 | * @host_pc: the host pc within the translation | 17 | |
35 | - * @will_exit: true if the TB executed will be interrupted after some | 18 | -#define TCG_REG_TMP TCG_REG_X30 |
36 | - cpu adjustments. Required for maintaining the correct | 19 | -#define TCG_VEC_TMP TCG_REG_V31 |
37 | - icount valus | 20 | +#define TCG_REG_TMP0 TCG_REG_X30 |
38 | * @return: true if state was restored, false otherwise | 21 | +#define TCG_VEC_TMP0 TCG_REG_V31 |
39 | * | 22 | |
40 | * Attempt to restore the state for a fault occurring in translated | 23 | #ifndef CONFIG_SOFTMMU |
41 | * code. If @host_pc is not in translated code no state is | 24 | #define TCG_REG_GUEST_BASE TCG_REG_X28 |
42 | * restored and the function returns false. | 25 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, |
43 | */ | 26 | static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, |
44 | -bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit); | 27 | TCGReg r, TCGReg base, intptr_t offset) |
45 | +bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc); | ||
46 | |||
47 | G_NORETURN void cpu_loop_exit_noexc(CPUState *cpu); | ||
48 | G_NORETURN void cpu_loop_exit(CPUState *cpu); | ||
49 | diff --git a/accel/tcg/cpu-exec-common.c b/accel/tcg/cpu-exec-common.c | ||
50 | index XXXXXXX..XXXXXXX 100644 | ||
51 | --- a/accel/tcg/cpu-exec-common.c | ||
52 | +++ b/accel/tcg/cpu-exec-common.c | ||
53 | @@ -XXX,XX +XXX,XX @@ void cpu_loop_exit(CPUState *cpu) | ||
54 | void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc) | ||
55 | { | 28 | { |
56 | if (pc) { | 29 | - TCGReg temp = TCG_REG_TMP; |
57 | - cpu_restore_state(cpu, pc, true); | 30 | + TCGReg temp = TCG_REG_TMP0; |
58 | + cpu_restore_state(cpu, pc); | 31 | |
32 | if (offset < -0xffffff || offset > 0xffffff) { | ||
33 | tcg_out_movi(s, TCG_TYPE_PTR, temp, offset); | ||
34 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd, | ||
59 | } | 35 | } |
60 | cpu_loop_exit(cpu); | 36 | |
37 | /* Worst-case scenario, move offset to temp register, use reg offset. */ | ||
38 | - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset); | ||
39 | - tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP); | ||
40 | + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, offset); | ||
41 | + tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP0); | ||
61 | } | 42 | } |
62 | diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c | 43 | |
63 | index XXXXXXX..XXXXXXX 100644 | 44 | static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) |
64 | --- a/accel/tcg/translate-all.c | 45 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target) |
65 | +++ b/accel/tcg/translate-all.c | 46 | if (offset == sextract64(offset, 0, 26)) { |
66 | @@ -XXX,XX +XXX,XX @@ void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, | 47 | tcg_out_insn(s, 3206, BL, offset); |
67 | #endif | ||
68 | } | ||
69 | |||
70 | -bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit) | ||
71 | +bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc) | ||
72 | { | ||
73 | - /* | ||
74 | - * The pc update associated with restore without exit will | ||
75 | - * break the relative pc adjustments performed by TARGET_TB_PCREL. | ||
76 | - */ | ||
77 | - if (TARGET_TB_PCREL) { | ||
78 | - assert(will_exit); | ||
79 | - } | ||
80 | - | ||
81 | /* | ||
82 | * The host_pc has to be in the rx region of the code buffer. | ||
83 | * If it is not we will not be able to resolve it here. | ||
84 | @@ -XXX,XX +XXX,XX @@ bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit) | ||
85 | if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) { | ||
86 | TranslationBlock *tb = tcg_tb_lookup(host_pc); | ||
87 | if (tb) { | ||
88 | - cpu_restore_state_from_tb(cpu, tb, host_pc, will_exit); | ||
89 | + cpu_restore_state_from_tb(cpu, tb, host_pc, true); | ||
90 | return true; | ||
91 | } | ||
92 | } | ||
93 | diff --git a/target/alpha/helper.c b/target/alpha/helper.c | ||
94 | index XXXXXXX..XXXXXXX 100644 | ||
95 | --- a/target/alpha/helper.c | ||
96 | +++ b/target/alpha/helper.c | ||
97 | @@ -XXX,XX +XXX,XX @@ G_NORETURN void dynamic_excp(CPUAlphaState *env, uintptr_t retaddr, | ||
98 | cs->exception_index = excp; | ||
99 | env->error_code = error; | ||
100 | if (retaddr) { | ||
101 | - cpu_restore_state(cs, retaddr, true); | ||
102 | + cpu_restore_state(cs, retaddr); | ||
103 | /* Floating-point exceptions (our only users) point to the next PC. */ | ||
104 | env->pc += 4; | ||
105 | } | ||
106 | diff --git a/target/alpha/mem_helper.c b/target/alpha/mem_helper.c | ||
107 | index XXXXXXX..XXXXXXX 100644 | ||
108 | --- a/target/alpha/mem_helper.c | ||
109 | +++ b/target/alpha/mem_helper.c | ||
110 | @@ -XXX,XX +XXX,XX @@ static void do_unaligned_access(CPUAlphaState *env, vaddr addr, uintptr_t retadd | ||
111 | uint64_t pc; | ||
112 | uint32_t insn; | ||
113 | |||
114 | - cpu_restore_state(env_cpu(env), retaddr, true); | ||
115 | + cpu_restore_state(env_cpu(env), retaddr); | ||
116 | |||
117 | pc = env->pc; | ||
118 | insn = cpu_ldl_code(env, pc); | ||
119 | diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c | ||
120 | index XXXXXXX..XXXXXXX 100644 | ||
121 | --- a/target/arm/op_helper.c | ||
122 | +++ b/target/arm/op_helper.c | ||
123 | @@ -XXX,XX +XXX,XX @@ void raise_exception_ra(CPUARMState *env, uint32_t excp, uint32_t syndrome, | ||
124 | * we must restore CPU state here before setting the syndrome | ||
125 | * the caller passed us, and cannot use cpu_loop_exit_restore(). | ||
126 | */ | ||
127 | - cpu_restore_state(cs, ra, true); | ||
128 | + cpu_restore_state(cs, ra); | ||
129 | raise_exception(env, excp, syndrome, target_el); | ||
130 | } | ||
131 | |||
132 | diff --git a/target/arm/tlb_helper.c b/target/arm/tlb_helper.c | ||
133 | index XXXXXXX..XXXXXXX 100644 | ||
134 | --- a/target/arm/tlb_helper.c | ||
135 | +++ b/target/arm/tlb_helper.c | ||
136 | @@ -XXX,XX +XXX,XX @@ void arm_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr, | ||
137 | ARMMMUFaultInfo fi = {}; | ||
138 | |||
139 | /* now we have a real cpu fault */ | ||
140 | - cpu_restore_state(cs, retaddr, true); | ||
141 | + cpu_restore_state(cs, retaddr); | ||
142 | |||
143 | fi.type = ARMFault_Alignment; | ||
144 | arm_deliver_fault(cpu, vaddr, access_type, mmu_idx, &fi); | ||
145 | @@ -XXX,XX +XXX,XX @@ void arm_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr, | ||
146 | ARMMMUFaultInfo fi = {}; | ||
147 | |||
148 | /* now we have a real cpu fault */ | ||
149 | - cpu_restore_state(cs, retaddr, true); | ||
150 | + cpu_restore_state(cs, retaddr); | ||
151 | |||
152 | fi.ea = arm_extabort_type(response); | ||
153 | fi.type = ARMFault_SyncExternal; | ||
154 | @@ -XXX,XX +XXX,XX @@ bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size, | ||
155 | return false; | ||
156 | } else { | 48 | } else { |
157 | /* now we have a real cpu fault */ | 49 | - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target); |
158 | - cpu_restore_state(cs, retaddr, true); | 50 | - tcg_out_insn(s, 3207, BLR, TCG_REG_TMP); |
159 | + cpu_restore_state(cs, retaddr); | 51 | + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target); |
160 | arm_deliver_fault(cpu, address, access_type, mmu_idx, fi); | 52 | + tcg_out_insn(s, 3207, BLR, TCG_REG_TMP0); |
161 | } | 53 | } |
162 | } | 54 | } |
163 | @@ -XXX,XX +XXX,XX @@ void arm_cpu_record_sigsegv(CPUState *cs, vaddr addr, | 55 | |
164 | * We report both ESR and FAR to signal handlers. | 56 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl, |
165 | * For now, it's easiest to deliver the fault normally. | 57 | AArch64Insn insn; |
166 | */ | 58 | |
167 | - cpu_restore_state(cs, ra, true); | 59 | if (rl == ah || (!const_bh && rl == bh)) { |
168 | + cpu_restore_state(cs, ra); | 60 | - rl = TCG_REG_TMP; |
169 | arm_deliver_fault(cpu, addr, access_type, MMU_USER_IDX, &fi); | 61 | + rl = TCG_REG_TMP0; |
170 | } | 62 | } |
171 | 63 | ||
172 | diff --git a/target/cris/helper.c b/target/cris/helper.c | 64 | if (const_bl) { |
173 | index XXXXXXX..XXXXXXX 100644 | 65 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl, |
174 | --- a/target/cris/helper.c | 66 | possibility of adding 0+const in the low part, and the |
175 | +++ b/target/cris/helper.c | 67 | immediate add instructions encode XSP not XZR. Don't try |
176 | @@ -XXX,XX +XXX,XX @@ bool cris_cpu_tlb_fill(CPUState *cs, vaddr address, int size, | 68 | anything more elaborate here than loading another zero. */ |
177 | cs->exception_index = EXCP_BUSFAULT; | 69 | - al = TCG_REG_TMP; |
178 | env->fault_vector = res.bf_vec; | 70 | + al = TCG_REG_TMP0; |
179 | if (retaddr) { | 71 | tcg_out_movi(s, ext, al, 0); |
180 | - if (cpu_restore_state(cs, retaddr, true)) { | ||
181 | + if (cpu_restore_state(cs, retaddr)) { | ||
182 | /* Evaluate flags after retranslation. */ | ||
183 | helper_top_evaluate_flags(env); | ||
184 | } | 72 | } |
185 | diff --git a/target/i386/tcg/sysemu/svm_helper.c b/target/i386/tcg/sysemu/svm_helper.c | 73 | tcg_out_insn_3401(s, insn, ext, rl, al, bl); |
186 | index XXXXXXX..XXXXXXX 100644 | 74 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d, |
187 | --- a/target/i386/tcg/sysemu/svm_helper.c | ||
188 | +++ b/target/i386/tcg/sysemu/svm_helper.c | ||
189 | @@ -XXX,XX +XXX,XX @@ void cpu_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1, | ||
190 | { | 75 | { |
191 | CPUState *cs = env_cpu(env); | 76 | TCGReg a1 = a0; |
192 | 77 | if (is_ctz) { | |
193 | - cpu_restore_state(cs, retaddr, true); | 78 | - a1 = TCG_REG_TMP; |
194 | + cpu_restore_state(cs, retaddr); | 79 | + a1 = TCG_REG_TMP0; |
195 | 80 | tcg_out_insn(s, 3507, RBIT, ext, a1, a0); | |
196 | qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmexit(%08x, %016" PRIx64 ", %016" | 81 | } |
197 | PRIx64 ", " TARGET_FMT_lx ")!\n", | 82 | if (const_b && b == (ext ? 64 : 32)) { |
198 | diff --git a/target/m68k/op_helper.c b/target/m68k/op_helper.c | 83 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d, |
199 | index XXXXXXX..XXXXXXX 100644 | 84 | AArch64Insn sel = I3506_CSEL; |
200 | --- a/target/m68k/op_helper.c | 85 | |
201 | +++ b/target/m68k/op_helper.c | 86 | tcg_out_cmp(s, ext, a0, 0, 1); |
202 | @@ -XXX,XX +XXX,XX @@ void m68k_cpu_transaction_failed(CPUState *cs, hwaddr physaddr, vaddr addr, | 87 | - tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1); |
203 | M68kCPU *cpu = M68K_CPU(cs); | 88 | + tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP0, a1); |
204 | CPUM68KState *env = &cpu->env; | 89 | |
205 | 90 | if (const_b) { | |
206 | - cpu_restore_state(cs, retaddr, true); | 91 | if (b == -1) { |
207 | + cpu_restore_state(cs, retaddr); | 92 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d, |
208 | 93 | b = d; | |
209 | if (m68k_feature(env, M68K_FEATURE_M68040)) { | 94 | } |
210 | env->mmu.mmusr = 0; | 95 | } |
211 | @@ -XXX,XX +XXX,XX @@ raise_exception_format2(CPUM68KState *env, int tt, int ilen, uintptr_t raddr) | 96 | - tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE); |
212 | cs->exception_index = tt; | 97 | + tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP0, b, TCG_COND_NE); |
213 | |||
214 | /* Recover PC and CC_OP for the beginning of the insn. */ | ||
215 | - cpu_restore_state(cs, raddr, true); | ||
216 | + cpu_restore_state(cs, raddr); | ||
217 | |||
218 | /* Flags are current in env->cc_*, or are undefined. */ | ||
219 | env->cc_op = CC_OP_FLAGS; | ||
220 | diff --git a/target/microblaze/helper.c b/target/microblaze/helper.c | ||
221 | index XXXXXXX..XXXXXXX 100644 | ||
222 | --- a/target/microblaze/helper.c | ||
223 | +++ b/target/microblaze/helper.c | ||
224 | @@ -XXX,XX +XXX,XX @@ void mb_cpu_do_unaligned_access(CPUState *cs, vaddr addr, | ||
225 | uint32_t esr, iflags; | ||
226 | |||
227 | /* Recover the pc and iflags from the corresponding insn_start. */ | ||
228 | - cpu_restore_state(cs, retaddr, true); | ||
229 | + cpu_restore_state(cs, retaddr); | ||
230 | iflags = cpu->env.iflags; | ||
231 | |||
232 | qemu_log_mask(CPU_LOG_INT, | ||
233 | diff --git a/target/nios2/op_helper.c b/target/nios2/op_helper.c | ||
234 | index XXXXXXX..XXXXXXX 100644 | ||
235 | --- a/target/nios2/op_helper.c | ||
236 | +++ b/target/nios2/op_helper.c | ||
237 | @@ -XXX,XX +XXX,XX @@ void nios2_cpu_loop_exit_advance(CPUNios2State *env, uintptr_t retaddr) | ||
238 | * Do this here, rather than in restore_state_to_opc(), | ||
239 | * lest we affect QEMU internal exceptions, like EXCP_DEBUG. | ||
240 | */ | ||
241 | - cpu_restore_state(cs, retaddr, true); | ||
242 | + cpu_restore_state(cs, retaddr); | ||
243 | env->pc += 4; | ||
244 | cpu_loop_exit(cs); | ||
245 | } | ||
246 | diff --git a/target/openrisc/sys_helper.c b/target/openrisc/sys_helper.c | ||
247 | index XXXXXXX..XXXXXXX 100644 | ||
248 | --- a/target/openrisc/sys_helper.c | ||
249 | +++ b/target/openrisc/sys_helper.c | ||
250 | @@ -XXX,XX +XXX,XX @@ void HELPER(mtspr)(CPUOpenRISCState *env, target_ulong spr, target_ulong rb) | ||
251 | break; | ||
252 | |||
253 | case TO_SPR(0, 16): /* NPC */ | ||
254 | - cpu_restore_state(cs, GETPC(), true); | ||
255 | + cpu_restore_state(cs, GETPC()); | ||
256 | /* ??? Mirror or1ksim in not trashing delayed branch state | ||
257 | when "jumping" to the current instruction. */ | ||
258 | if (env->pc != rb) { | ||
259 | @@ -XXX,XX +XXX,XX @@ void HELPER(mtspr)(CPUOpenRISCState *env, target_ulong spr, target_ulong rb) | ||
260 | case TO_SPR(8, 0): /* PMR */ | ||
261 | env->pmr = rb; | ||
262 | if (env->pmr & PMR_DME || env->pmr & PMR_SME) { | ||
263 | - cpu_restore_state(cs, GETPC(), true); | ||
264 | + cpu_restore_state(cs, GETPC()); | ||
265 | env->pc += 4; | ||
266 | cs->halted = 1; | ||
267 | raise_exception(cpu, EXCP_HALTED); | ||
268 | diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c | ||
269 | index XXXXXXX..XXXXXXX 100644 | ||
270 | --- a/target/ppc/excp_helper.c | ||
271 | +++ b/target/ppc/excp_helper.c | ||
272 | @@ -XXX,XX +XXX,XX @@ void ppc_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr, | ||
273 | uint32_t insn; | ||
274 | |||
275 | /* Restore state and reload the insn we executed, for filling in DSISR. */ | ||
276 | - cpu_restore_state(cs, retaddr, true); | ||
277 | + cpu_restore_state(cs, retaddr); | ||
278 | insn = cpu_ldl_code(env, env->nip); | ||
279 | |||
280 | switch (env->mmu_model) { | ||
281 | diff --git a/target/s390x/tcg/excp_helper.c b/target/s390x/tcg/excp_helper.c | ||
282 | index XXXXXXX..XXXXXXX 100644 | ||
283 | --- a/target/s390x/tcg/excp_helper.c | ||
284 | +++ b/target/s390x/tcg/excp_helper.c | ||
285 | @@ -XXX,XX +XXX,XX @@ G_NORETURN void tcg_s390_program_interrupt(CPUS390XState *env, | ||
286 | { | ||
287 | CPUState *cs = env_cpu(env); | ||
288 | |||
289 | - cpu_restore_state(cs, ra, true); | ||
290 | + cpu_restore_state(cs, ra); | ||
291 | qemu_log_mask(CPU_LOG_INT, "program interrupt at %#" PRIx64 "\n", | ||
292 | env->psw.addr); | ||
293 | trigger_pgm_exception(env, code); | ||
294 | diff --git a/target/tricore/op_helper.c b/target/tricore/op_helper.c | ||
295 | index XXXXXXX..XXXXXXX 100644 | ||
296 | --- a/target/tricore/op_helper.c | ||
297 | +++ b/target/tricore/op_helper.c | ||
298 | @@ -XXX,XX +XXX,XX @@ void raise_exception_sync_internal(CPUTriCoreState *env, uint32_t class, int tin | ||
299 | { | ||
300 | CPUState *cs = env_cpu(env); | ||
301 | /* in case we come from a helper-call we need to restore the PC */ | ||
302 | - cpu_restore_state(cs, pc, true); | ||
303 | + cpu_restore_state(cs, pc); | ||
304 | |||
305 | /* Tin is loaded into d[15] */ | ||
306 | env->gpr_d[15] = tin; | ||
307 | diff --git a/target/xtensa/helper.c b/target/xtensa/helper.c | ||
308 | index XXXXXXX..XXXXXXX 100644 | ||
309 | --- a/target/xtensa/helper.c | ||
310 | +++ b/target/xtensa/helper.c | ||
311 | @@ -XXX,XX +XXX,XX @@ void xtensa_cpu_do_unaligned_access(CPUState *cs, | ||
312 | |||
313 | assert(xtensa_option_enabled(env->config, | ||
314 | XTENSA_OPTION_UNALIGNED_EXCEPTION)); | ||
315 | - cpu_restore_state(CPU(cpu), retaddr, true); | ||
316 | + cpu_restore_state(CPU(cpu), retaddr); | ||
317 | HELPER(exception_cause_vaddr)(env, | ||
318 | env->pc, LOAD_STORE_ALIGNMENT_CAUSE, | ||
319 | addr); | ||
320 | @@ -XXX,XX +XXX,XX @@ bool xtensa_cpu_tlb_fill(CPUState *cs, vaddr address, int size, | ||
321 | } else if (probe) { | ||
322 | return false; | ||
323 | } else { | ||
324 | - cpu_restore_state(cs, retaddr, true); | ||
325 | + cpu_restore_state(cs, retaddr); | ||
326 | HELPER(exception_cause_vaddr)(env, env->pc, ret, address); | ||
327 | } | 98 | } |
328 | } | 99 | } |
329 | @@ -XXX,XX +XXX,XX @@ void xtensa_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr, vaddr addr, | 100 | |
330 | XtensaCPU *cpu = XTENSA_CPU(cs); | 101 | @@ -XXX,XX +XXX,XX @@ bool tcg_target_has_memory_bswap(MemOp memop) |
331 | CPUXtensaState *env = &cpu->env; | 102 | } |
332 | 103 | ||
333 | - cpu_restore_state(cs, retaddr, true); | 104 | static const TCGLdstHelperParam ldst_helper_param = { |
334 | + cpu_restore_state(cs, retaddr); | 105 | - .ntmp = 1, .tmp = { TCG_REG_TMP } |
335 | HELPER(exception_cause_vaddr)(env, env->pc, | 106 | + .ntmp = 1, .tmp = { TCG_REG_TMP0 } |
336 | access_type == MMU_INST_FETCH ? | 107 | }; |
337 | INSTR_PIF_ADDR_ERROR_CAUSE : | 108 | |
109 | static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) | ||
110 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_goto_tb(TCGContext *s, int which) | ||
111 | |||
112 | set_jmp_insn_offset(s, which); | ||
113 | tcg_out32(s, I3206_B); | ||
114 | - tcg_out_insn(s, 3207, BR, TCG_REG_TMP); | ||
115 | + tcg_out_insn(s, 3207, BR, TCG_REG_TMP0); | ||
116 | set_jmp_reset_offset(s, which); | ||
117 | } | ||
118 | |||
119 | @@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n, | ||
120 | ptrdiff_t i_offset = i_addr - jmp_rx; | ||
121 | |||
122 | /* Note that we asserted this in range in tcg_out_goto_tb. */ | ||
123 | - insn = deposit32(I3305_LDR | TCG_REG_TMP, 5, 19, i_offset >> 2); | ||
124 | + insn = deposit32(I3305_LDR | TCG_REG_TMP0, 5, 19, i_offset >> 2); | ||
125 | } | ||
126 | qatomic_set((uint32_t *)jmp_rw, insn); | ||
127 | flush_idcache_range(jmp_rx, jmp_rw, 4); | ||
128 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, | ||
129 | |||
130 | case INDEX_op_rem_i64: | ||
131 | case INDEX_op_rem_i32: | ||
132 | - tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2); | ||
133 | - tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); | ||
134 | + tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP0, a1, a2); | ||
135 | + tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1); | ||
136 | break; | ||
137 | case INDEX_op_remu_i64: | ||
138 | case INDEX_op_remu_i32: | ||
139 | - tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2); | ||
140 | - tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1); | ||
141 | + tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP0, a1, a2); | ||
142 | + tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1); | ||
143 | break; | ||
144 | |||
145 | case INDEX_op_shl_i64: | ||
146 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, | ||
147 | if (c2) { | ||
148 | tcg_out_rotl(s, ext, a0, a1, a2); | ||
149 | } else { | ||
150 | - tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2); | ||
151 | - tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP); | ||
152 | + tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP0, TCG_REG_XZR, a2); | ||
153 | + tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP0); | ||
154 | } | ||
155 | break; | ||
156 | |||
157 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | ||
158 | break; | ||
159 | } | ||
160 | } | ||
161 | - tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0); | ||
162 | - a2 = TCG_VEC_TMP; | ||
163 | + tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP0, 0); | ||
164 | + a2 = TCG_VEC_TMP0; | ||
165 | } | ||
166 | if (is_scalar) { | ||
167 | insn = cmp_scalar_insn[cond]; | ||
168 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | ||
169 | s->reserved_regs = 0; | ||
170 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); | ||
171 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP); | ||
172 | - tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP); | ||
173 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */ | ||
174 | - tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP); | ||
175 | + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0); | ||
176 | + tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0); | ||
177 | } | ||
178 | |||
179 | /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */ | ||
338 | -- | 180 | -- |
339 | 2.34.1 | 181 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
2 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
3 | --- | ||
4 | tcg/aarch64/tcg-target.c.inc | 9 +++++++-- | ||
5 | 1 file changed, 7 insertions(+), 2 deletions(-) | ||
1 | 6 | ||
7 | diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc | ||
8 | index XXXXXXX..XXXXXXX 100644 | ||
9 | --- a/tcg/aarch64/tcg-target.c.inc | ||
10 | +++ b/tcg/aarch64/tcg-target.c.inc | ||
11 | @@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = { | ||
12 | |||
13 | TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, | ||
14 | TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15, | ||
15 | - TCG_REG_X16, TCG_REG_X17, | ||
16 | |||
17 | TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, | ||
18 | TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7, | ||
19 | |||
20 | + /* X16 reserved as temporary */ | ||
21 | + /* X17 reserved as temporary */ | ||
22 | /* X18 reserved by system */ | ||
23 | /* X19 reserved for AREG0 */ | ||
24 | /* X29 reserved as fp */ | ||
25 | @@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) | ||
26 | return TCG_REG_X0 + slot; | ||
27 | } | ||
28 | |||
29 | -#define TCG_REG_TMP0 TCG_REG_X30 | ||
30 | +#define TCG_REG_TMP0 TCG_REG_X16 | ||
31 | +#define TCG_REG_TMP1 TCG_REG_X17 | ||
32 | +#define TCG_REG_TMP2 TCG_REG_X30 | ||
33 | #define TCG_VEC_TMP0 TCG_REG_V31 | ||
34 | |||
35 | #ifndef CONFIG_SOFTMMU | ||
36 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | ||
37 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP); | ||
38 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */ | ||
39 | tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0); | ||
40 | + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); | ||
41 | + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2); | ||
42 | tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0); | ||
43 | } | ||
44 | |||
45 | -- | ||
46 | 2.34.1 | diff view generated by jsdifflib |
1 | With sparc64 we need not distinguish between registers that | 1 | Adjust the softmmu tlb to use TMP[0-2], not any of the normally available |
---|---|---|---|
2 | can hold 32-bit values and those that can hold 64-bit values. | 2 | registers. Since we handle overlap betwen inputs and helper arguments, |
3 | we can allow any allocatable reg. | ||
3 | 4 | ||
4 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
6 | --- | 7 | --- |
7 | tcg/sparc64/tcg-target-con-set.h | 16 +---- | 8 | tcg/aarch64/tcg-target-con-set.h | 2 -- |
8 | tcg/sparc64/tcg-target-con-str.h | 3 - | 9 | tcg/aarch64/tcg-target-con-str.h | 1 - |
9 | tcg/sparc64/tcg-target.c.inc | 109 ++++++++++++------------------- | 10 | tcg/aarch64/tcg-target.c.inc | 45 ++++++++++++++------------------ |
10 | 3 files changed, 44 insertions(+), 84 deletions(-) | 11 | 3 files changed, 19 insertions(+), 29 deletions(-) |
11 | 12 | ||
12 | diff --git a/tcg/sparc64/tcg-target-con-set.h b/tcg/sparc64/tcg-target-con-set.h | 13 | diff --git a/tcg/aarch64/tcg-target-con-set.h b/tcg/aarch64/tcg-target-con-set.h |
13 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/tcg/sparc64/tcg-target-con-set.h | 15 | --- a/tcg/aarch64/tcg-target-con-set.h |
15 | +++ b/tcg/sparc64/tcg-target-con-set.h | 16 | +++ b/tcg/aarch64/tcg-target-con-set.h |
16 | @@ -XXX,XX +XXX,XX @@ | 17 | @@ -XXX,XX +XXX,XX @@ |
18 | * tcg-target-con-str.h; the constraint combination is inclusive or. | ||
17 | */ | 19 | */ |
18 | C_O0_I1(r) | 20 | C_O0_I1(r) |
21 | -C_O0_I2(lZ, l) | ||
22 | C_O0_I2(r, rA) | ||
19 | C_O0_I2(rZ, r) | 23 | C_O0_I2(rZ, r) |
20 | -C_O0_I2(RZ, r) | 24 | C_O0_I2(w, r) |
21 | C_O0_I2(rZ, rJ) | 25 | -C_O1_I1(r, l) |
22 | -C_O0_I2(RZ, RJ) | ||
23 | -C_O0_I2(sZ, A) | ||
24 | -C_O0_I2(SZ, A) | ||
25 | -C_O1_I1(r, A) | ||
26 | -C_O1_I1(R, A) | ||
27 | +C_O0_I2(sZ, s) | ||
28 | +C_O1_I1(r, s) | ||
29 | C_O1_I1(r, r) | 26 | C_O1_I1(r, r) |
30 | -C_O1_I1(r, R) | 27 | C_O1_I1(w, r) |
31 | -C_O1_I1(R, r) | 28 | C_O1_I1(w, w) |
32 | -C_O1_I1(R, R) | 29 | diff --git a/tcg/aarch64/tcg-target-con-str.h b/tcg/aarch64/tcg-target-con-str.h |
33 | -C_O1_I2(R, R, R) | ||
34 | +C_O1_I2(r, r, r) | ||
35 | C_O1_I2(r, rZ, rJ) | ||
36 | -C_O1_I2(R, RZ, RJ) | ||
37 | C_O1_I4(r, rZ, rJ, rI, 0) | ||
38 | -C_O1_I4(R, RZ, RJ, RI, 0) | ||
39 | C_O2_I2(r, r, rZ, rJ) | ||
40 | -C_O2_I4(R, R, RZ, RZ, RJ, RI) | ||
41 | C_O2_I4(r, r, rZ, rZ, rJ, rJ) | ||
42 | diff --git a/tcg/sparc64/tcg-target-con-str.h b/tcg/sparc64/tcg-target-con-str.h | ||
43 | index XXXXXXX..XXXXXXX 100644 | 30 | index XXXXXXX..XXXXXXX 100644 |
44 | --- a/tcg/sparc64/tcg-target-con-str.h | 31 | --- a/tcg/aarch64/tcg-target-con-str.h |
45 | +++ b/tcg/sparc64/tcg-target-con-str.h | 32 | +++ b/tcg/aarch64/tcg-target-con-str.h |
46 | @@ -XXX,XX +XXX,XX @@ | 33 | @@ -XXX,XX +XXX,XX @@ |
47 | * REGS(letter, register_mask) | 34 | * REGS(letter, register_mask) |
48 | */ | 35 | */ |
49 | REGS('r', ALL_GENERAL_REGS) | 36 | REGS('r', ALL_GENERAL_REGS) |
50 | -REGS('R', ALL_GENERAL_REGS64) | 37 | -REGS('l', ALL_QLDST_REGS) |
51 | REGS('s', ALL_QLDST_REGS) | 38 | REGS('w', ALL_VECTOR_REGS) |
52 | -REGS('S', ALL_QLDST_REGS64) | ||
53 | -REGS('A', TARGET_LONG_BITS == 64 ? ALL_QLDST_REGS64 : ALL_QLDST_REGS) | ||
54 | 39 | ||
55 | /* | 40 | /* |
56 | * Define constraint letters for constants: | 41 | diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc |
57 | diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc | ||
58 | index XXXXXXX..XXXXXXX 100644 | 42 | index XXXXXXX..XXXXXXX 100644 |
59 | --- a/tcg/sparc64/tcg-target.c.inc | 43 | --- a/tcg/aarch64/tcg-target.c.inc |
60 | +++ b/tcg/sparc64/tcg-target.c.inc | 44 | +++ b/tcg/aarch64/tcg-target.c.inc |
61 | @@ -XXX,XX +XXX,XX @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { | 45 | @@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, |
46 | #define ALL_GENERAL_REGS 0xffffffffu | ||
47 | #define ALL_VECTOR_REGS 0xffffffff00000000ull | ||
48 | |||
49 | -#ifdef CONFIG_SOFTMMU | ||
50 | -#define ALL_QLDST_REGS \ | ||
51 | - (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \ | ||
52 | - (1 << TCG_REG_X2) | (1 << TCG_REG_X3))) | ||
53 | -#else | ||
54 | -#define ALL_QLDST_REGS ALL_GENERAL_REGS | ||
55 | -#endif | ||
56 | - | ||
57 | /* Match a constant valid for addition (12-bit, optionally shifted). */ | ||
58 | static inline bool is_aimm(uint64_t val) | ||
59 | { | ||
60 | @@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, | ||
61 | unsigned s_bits = opc & MO_SIZE; | ||
62 | unsigned s_mask = (1u << s_bits) - 1; | ||
63 | unsigned mem_index = get_mmuidx(oi); | ||
64 | - TCGReg x3; | ||
65 | + TCGReg addr_adj; | ||
66 | TCGType mask_type; | ||
67 | uint64_t compare_mask; | ||
68 | |||
69 | @@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, | ||
70 | mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32 | ||
71 | ? TCG_TYPE_I64 : TCG_TYPE_I32); | ||
72 | |||
73 | - /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */ | ||
74 | + /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {tmp0,tmp1}. */ | ||
75 | QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); | ||
76 | QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512); | ||
77 | QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); | ||
78 | QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8); | ||
79 | - tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0, | ||
80 | + tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0, | ||
81 | TLB_MASK_TABLE_OFS(mem_index), 1, 0); | ||
82 | |||
83 | /* Extract the TLB index from the address into X0. */ | ||
84 | tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64, | ||
85 | - TCG_REG_X0, TCG_REG_X0, addr_reg, | ||
86 | + TCG_REG_TMP0, TCG_REG_TMP0, addr_reg, | ||
87 | s->page_bits - CPU_TLB_ENTRY_BITS); | ||
88 | |||
89 | - /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */ | ||
90 | - tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0); | ||
91 | + /* Add the tlb_table pointer, forming the CPUTLBEntry address in TMP1. */ | ||
92 | + tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0); | ||
93 | |||
94 | - /* Load the tlb comparator into X0, and the fast path addend into X1. */ | ||
95 | - tcg_out_ld(s, addr_type, TCG_REG_X0, TCG_REG_X1, | ||
96 | + /* Load the tlb comparator into TMP0, and the fast path addend into TMP1. */ | ||
97 | + tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP1, | ||
98 | is_ld ? offsetof(CPUTLBEntry, addr_read) | ||
99 | : offsetof(CPUTLBEntry, addr_write)); | ||
100 | - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1, | ||
101 | + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, | ||
102 | offsetof(CPUTLBEntry, addend)); | ||
103 | |||
104 | /* | ||
105 | @@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, | ||
106 | * cross pages using the address of the last byte of the access. | ||
107 | */ | ||
108 | if (a_mask >= s_mask) { | ||
109 | - x3 = addr_reg; | ||
110 | + addr_adj = addr_reg; | ||
111 | } else { | ||
112 | + addr_adj = TCG_REG_TMP2; | ||
113 | tcg_out_insn(s, 3401, ADDI, addr_type, | ||
114 | - TCG_REG_X3, addr_reg, s_mask - a_mask); | ||
115 | - x3 = TCG_REG_X3; | ||
116 | + addr_adj, addr_reg, s_mask - a_mask); | ||
117 | } | ||
118 | compare_mask = (uint64_t)s->page_mask | a_mask; | ||
119 | |||
120 | - /* Store the page mask part of the address into X3. */ | ||
121 | - tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_X3, x3, compare_mask); | ||
122 | + /* Store the page mask part of the address into TMP2. */ | ||
123 | + tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2, | ||
124 | + addr_adj, compare_mask); | ||
125 | |||
126 | /* Perform the address comparison. */ | ||
127 | - tcg_out_cmp(s, addr_type, TCG_REG_X0, TCG_REG_X3, 0); | ||
128 | + tcg_out_cmp(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2, 0); | ||
129 | |||
130 | /* If not equal, we jump to the slow path. */ | ||
131 | ldst->label_ptr[0] = s->code_ptr; | ||
132 | tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); | ||
133 | |||
134 | - h->base = TCG_REG_X1, | ||
135 | + h->base = TCG_REG_TMP1; | ||
136 | h->index = addr_reg; | ||
137 | h->index_ext = addr_type; | ||
62 | #else | 138 | #else |
63 | #define SOFTMMU_RESERVE_REGS 0 | ||
64 | #endif | ||
65 | - | ||
66 | -/* | ||
67 | - * Note that sparcv8plus can only hold 64 bit quantities in %g and %o | ||
68 | - * registers. These are saved manually by the kernel in full 64-bit | ||
69 | - * slots. The %i and %l registers are saved by the register window | ||
70 | - * mechanism, which only allocates space for 32 bits. Given that this | ||
71 | - * window spill/fill can happen on any signal, we must consider the | ||
72 | - * high bits of the %i and %l registers garbage at all times. | ||
73 | - */ | ||
74 | #define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32) | ||
75 | -# define ALL_GENERAL_REGS64 ALL_GENERAL_REGS | ||
76 | #define ALL_QLDST_REGS (ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS) | ||
77 | -#define ALL_QLDST_REGS64 (ALL_GENERAL_REGS64 & ~SOFTMMU_RESERVE_REGS) | ||
78 | |||
79 | /* Define some temporary registers. T2 is used for constant generation. */ | ||
80 | #define TCG_REG_T1 TCG_REG_G1 | ||
81 | @@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) | 139 | @@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) |
82 | return C_O0_I1(r); | 140 | case INDEX_op_qemu_ld_a64_i32: |
83 | 141 | case INDEX_op_qemu_ld_a32_i64: | |
84 | case INDEX_op_ld8u_i32: | 142 | case INDEX_op_qemu_ld_a64_i64: |
85 | + case INDEX_op_ld8u_i64: | 143 | - return C_O1_I1(r, l); |
86 | case INDEX_op_ld8s_i32: | 144 | + return C_O1_I1(r, r); |
87 | + case INDEX_op_ld8s_i64: | 145 | case INDEX_op_qemu_st_a32_i32: |
88 | case INDEX_op_ld16u_i32: | 146 | case INDEX_op_qemu_st_a64_i32: |
89 | + case INDEX_op_ld16u_i64: | 147 | case INDEX_op_qemu_st_a32_i64: |
90 | case INDEX_op_ld16s_i32: | 148 | case INDEX_op_qemu_st_a64_i64: |
91 | + case INDEX_op_ld16s_i64: | 149 | - return C_O0_I2(lZ, l); |
92 | case INDEX_op_ld_i32: | 150 | + return C_O0_I2(rZ, r); |
93 | + case INDEX_op_ld32u_i64: | 151 | |
94 | + case INDEX_op_ld32s_i64: | 152 | case INDEX_op_deposit_i32: |
95 | + case INDEX_op_ld_i64: | 153 | case INDEX_op_deposit_i64: |
96 | case INDEX_op_neg_i32: | ||
97 | + case INDEX_op_neg_i64: | ||
98 | case INDEX_op_not_i32: | ||
99 | + case INDEX_op_not_i64: | ||
100 | + case INDEX_op_ext32s_i64: | ||
101 | + case INDEX_op_ext32u_i64: | ||
102 | + case INDEX_op_ext_i32_i64: | ||
103 | + case INDEX_op_extu_i32_i64: | ||
104 | + case INDEX_op_extrl_i64_i32: | ||
105 | + case INDEX_op_extrh_i64_i32: | ||
106 | return C_O1_I1(r, r); | ||
107 | |||
108 | case INDEX_op_st8_i32: | ||
109 | + case INDEX_op_st8_i64: | ||
110 | case INDEX_op_st16_i32: | ||
111 | + case INDEX_op_st16_i64: | ||
112 | case INDEX_op_st_i32: | ||
113 | + case INDEX_op_st32_i64: | ||
114 | + case INDEX_op_st_i64: | ||
115 | return C_O0_I2(rZ, r); | ||
116 | |||
117 | case INDEX_op_add_i32: | ||
118 | + case INDEX_op_add_i64: | ||
119 | case INDEX_op_mul_i32: | ||
120 | + case INDEX_op_mul_i64: | ||
121 | case INDEX_op_div_i32: | ||
122 | + case INDEX_op_div_i64: | ||
123 | case INDEX_op_divu_i32: | ||
124 | + case INDEX_op_divu_i64: | ||
125 | case INDEX_op_sub_i32: | ||
126 | + case INDEX_op_sub_i64: | ||
127 | case INDEX_op_and_i32: | ||
128 | + case INDEX_op_and_i64: | ||
129 | case INDEX_op_andc_i32: | ||
130 | + case INDEX_op_andc_i64: | ||
131 | case INDEX_op_or_i32: | ||
132 | + case INDEX_op_or_i64: | ||
133 | case INDEX_op_orc_i32: | ||
134 | + case INDEX_op_orc_i64: | ||
135 | case INDEX_op_xor_i32: | ||
136 | + case INDEX_op_xor_i64: | ||
137 | case INDEX_op_shl_i32: | ||
138 | + case INDEX_op_shl_i64: | ||
139 | case INDEX_op_shr_i32: | ||
140 | + case INDEX_op_shr_i64: | ||
141 | case INDEX_op_sar_i32: | ||
142 | + case INDEX_op_sar_i64: | ||
143 | case INDEX_op_setcond_i32: | ||
144 | + case INDEX_op_setcond_i64: | ||
145 | return C_O1_I2(r, rZ, rJ); | ||
146 | |||
147 | case INDEX_op_brcond_i32: | ||
148 | + case INDEX_op_brcond_i64: | ||
149 | return C_O0_I2(rZ, rJ); | ||
150 | case INDEX_op_movcond_i32: | ||
151 | + case INDEX_op_movcond_i64: | ||
152 | return C_O1_I4(r, rZ, rJ, rI, 0); | ||
153 | case INDEX_op_add2_i32: | ||
154 | + case INDEX_op_add2_i64: | ||
155 | case INDEX_op_sub2_i32: | ||
156 | + case INDEX_op_sub2_i64: | ||
157 | return C_O2_I4(r, r, rZ, rZ, rJ, rJ); | ||
158 | case INDEX_op_mulu2_i32: | ||
159 | case INDEX_op_muls2_i32: | ||
160 | return C_O2_I2(r, r, rZ, rJ); | ||
161 | - | ||
162 | - case INDEX_op_ld8u_i64: | ||
163 | - case INDEX_op_ld8s_i64: | ||
164 | - case INDEX_op_ld16u_i64: | ||
165 | - case INDEX_op_ld16s_i64: | ||
166 | - case INDEX_op_ld32u_i64: | ||
167 | - case INDEX_op_ld32s_i64: | ||
168 | - case INDEX_op_ld_i64: | ||
169 | - case INDEX_op_ext_i32_i64: | ||
170 | - case INDEX_op_extu_i32_i64: | ||
171 | - return C_O1_I1(R, r); | ||
172 | - | ||
173 | - case INDEX_op_st8_i64: | ||
174 | - case INDEX_op_st16_i64: | ||
175 | - case INDEX_op_st32_i64: | ||
176 | - case INDEX_op_st_i64: | ||
177 | - return C_O0_I2(RZ, r); | ||
178 | - | ||
179 | - case INDEX_op_add_i64: | ||
180 | - case INDEX_op_mul_i64: | ||
181 | - case INDEX_op_div_i64: | ||
182 | - case INDEX_op_divu_i64: | ||
183 | - case INDEX_op_sub_i64: | ||
184 | - case INDEX_op_and_i64: | ||
185 | - case INDEX_op_andc_i64: | ||
186 | - case INDEX_op_or_i64: | ||
187 | - case INDEX_op_orc_i64: | ||
188 | - case INDEX_op_xor_i64: | ||
189 | - case INDEX_op_shl_i64: | ||
190 | - case INDEX_op_shr_i64: | ||
191 | - case INDEX_op_sar_i64: | ||
192 | - case INDEX_op_setcond_i64: | ||
193 | - return C_O1_I2(R, RZ, RJ); | ||
194 | - | ||
195 | - case INDEX_op_neg_i64: | ||
196 | - case INDEX_op_not_i64: | ||
197 | - case INDEX_op_ext32s_i64: | ||
198 | - case INDEX_op_ext32u_i64: | ||
199 | - return C_O1_I1(R, R); | ||
200 | - | ||
201 | - case INDEX_op_extrl_i64_i32: | ||
202 | - case INDEX_op_extrh_i64_i32: | ||
203 | - return C_O1_I1(r, R); | ||
204 | - | ||
205 | - case INDEX_op_brcond_i64: | ||
206 | - return C_O0_I2(RZ, RJ); | ||
207 | - case INDEX_op_movcond_i64: | ||
208 | - return C_O1_I4(R, RZ, RJ, RI, 0); | ||
209 | - case INDEX_op_add2_i64: | ||
210 | - case INDEX_op_sub2_i64: | ||
211 | - return C_O2_I4(R, R, RZ, RZ, RJ, RI); | ||
212 | case INDEX_op_muluh_i64: | ||
213 | - return C_O1_I2(R, R, R); | ||
214 | + return C_O1_I2(r, r, r); | ||
215 | |||
216 | case INDEX_op_qemu_ld_i32: | ||
217 | - return C_O1_I1(r, A); | ||
218 | case INDEX_op_qemu_ld_i64: | ||
219 | - return C_O1_I1(R, A); | ||
220 | + return C_O1_I1(r, s); | ||
221 | case INDEX_op_qemu_st_i32: | ||
222 | - return C_O0_I2(sZ, A); | ||
223 | case INDEX_op_qemu_st_i64: | ||
224 | - return C_O0_I2(SZ, A); | ||
225 | + return C_O0_I2(sZ, s); | ||
226 | |||
227 | default: | ||
228 | g_assert_not_reached(); | ||
229 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s) | ||
230 | #endif | ||
231 | |||
232 | tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS; | ||
233 | - tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS64; | ||
234 | + tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS; | ||
235 | |||
236 | tcg_target_call_clobber_regs = 0; | ||
237 | tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G1); | ||
238 | -- | 154 | -- |
239 | 2.34.1 | 155 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | With FEAT_LSE2, LDP/STP suffices. Without FEAT_LSE2, use LDXP+STXP | ||
2 | 16-byte atomicity is required and LDP/STP otherwise. | ||
1 | 3 | ||
4 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | tcg/aarch64/tcg-target-con-set.h | 2 + | ||
8 | tcg/aarch64/tcg-target.h | 11 ++- | ||
9 | tcg/aarch64/tcg-target.c.inc | 141 ++++++++++++++++++++++++++++++- | ||
10 | 3 files changed, 151 insertions(+), 3 deletions(-) | ||
11 | |||
12 | diff --git a/tcg/aarch64/tcg-target-con-set.h b/tcg/aarch64/tcg-target-con-set.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/tcg/aarch64/tcg-target-con-set.h | ||
15 | +++ b/tcg/aarch64/tcg-target-con-set.h | ||
16 | @@ -XXX,XX +XXX,XX @@ C_O0_I1(r) | ||
17 | C_O0_I2(r, rA) | ||
18 | C_O0_I2(rZ, r) | ||
19 | C_O0_I2(w, r) | ||
20 | +C_O0_I3(rZ, rZ, r) | ||
21 | C_O1_I1(r, r) | ||
22 | C_O1_I1(w, r) | ||
23 | C_O1_I1(w, w) | ||
24 | @@ -XXX,XX +XXX,XX @@ C_O1_I2(w, w, wO) | ||
25 | C_O1_I2(w, w, wZ) | ||
26 | C_O1_I3(w, w, w, w) | ||
27 | C_O1_I4(r, r, rA, rZ, rZ) | ||
28 | +C_O2_I1(r, r, r) | ||
29 | C_O2_I4(r, r, rZ, rZ, rA, rMZ) | ||
30 | diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h | ||
31 | index XXXXXXX..XXXXXXX 100644 | ||
32 | --- a/tcg/aarch64/tcg-target.h | ||
33 | +++ b/tcg/aarch64/tcg-target.h | ||
34 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
35 | #define TCG_TARGET_HAS_muluh_i64 1 | ||
36 | #define TCG_TARGET_HAS_mulsh_i64 1 | ||
37 | |||
38 | -#define TCG_TARGET_HAS_qemu_ldst_i128 0 | ||
39 | +/* | ||
40 | + * Without FEAT_LSE2, we must use LDXP+STXP to implement atomic 128-bit load, | ||
41 | + * which requires writable pages. We must defer to the helper for user-only, | ||
42 | + * but in system mode all ram is writable for the host. | ||
43 | + */ | ||
44 | +#ifdef CONFIG_USER_ONLY | ||
45 | +#define TCG_TARGET_HAS_qemu_ldst_i128 have_lse2 | ||
46 | +#else | ||
47 | +#define TCG_TARGET_HAS_qemu_ldst_i128 1 | ||
48 | +#endif | ||
49 | |||
50 | #define TCG_TARGET_HAS_v64 1 | ||
51 | #define TCG_TARGET_HAS_v128 1 | ||
52 | diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc | ||
53 | index XXXXXXX..XXXXXXX 100644 | ||
54 | --- a/tcg/aarch64/tcg-target.c.inc | ||
55 | +++ b/tcg/aarch64/tcg-target.c.inc | ||
56 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
57 | I3305_LDR_v64 = 0x5c000000, | ||
58 | I3305_LDR_v128 = 0x9c000000, | ||
59 | |||
60 | + /* Load/store exclusive. */ | ||
61 | + I3306_LDXP = 0xc8600000, | ||
62 | + I3306_STXP = 0xc8200000, | ||
63 | + | ||
64 | /* Load/store register. Described here as 3.3.12, but the helper | ||
65 | that emits them can transform to 3.3.10 or 3.3.13. */ | ||
66 | I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30, | ||
67 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
68 | I3406_ADR = 0x10000000, | ||
69 | I3406_ADRP = 0x90000000, | ||
70 | |||
71 | + /* Add/subtract extended register instructions. */ | ||
72 | + I3501_ADD = 0x0b200000, | ||
73 | + | ||
74 | /* Add/subtract shifted register instructions (without a shift). */ | ||
75 | I3502_ADD = 0x0b000000, | ||
76 | I3502_ADDS = 0x2b000000, | ||
77 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, | ||
78 | tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt); | ||
79 | } | ||
80 | |||
81 | +static void tcg_out_insn_3306(TCGContext *s, AArch64Insn insn, TCGReg rs, | ||
82 | + TCGReg rt, TCGReg rt2, TCGReg rn) | ||
83 | +{ | ||
84 | + tcg_out32(s, insn | rs << 16 | rt2 << 10 | rn << 5 | rt); | ||
85 | +} | ||
86 | + | ||
87 | static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext, | ||
88 | TCGReg rt, int imm19) | ||
89 | { | ||
90 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn, | ||
91 | tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd); | ||
92 | } | ||
93 | |||
94 | +static inline void tcg_out_insn_3501(TCGContext *s, AArch64Insn insn, | ||
95 | + TCGType sf, TCGReg rd, TCGReg rn, | ||
96 | + TCGReg rm, int opt, int imm3) | ||
97 | +{ | ||
98 | + tcg_out32(s, insn | sf << 31 | rm << 16 | opt << 13 | | ||
99 | + imm3 << 10 | rn << 5 | rd); | ||
100 | +} | ||
101 | + | ||
102 | /* This function is for both 3.5.2 (Add/Subtract shifted register), for | ||
103 | the rare occasion when we actually want to supply a shift amount. */ | ||
104 | static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn, | ||
105 | @@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, | ||
106 | TCGType addr_type = s->addr_type; | ||
107 | TCGLabelQemuLdst *ldst = NULL; | ||
108 | MemOp opc = get_memop(oi); | ||
109 | + MemOp s_bits = opc & MO_SIZE; | ||
110 | unsigned a_mask; | ||
111 | |||
112 | h->aa = atom_and_align_for_opc(s, opc, | ||
113 | have_lse2 ? MO_ATOM_WITHIN16 | ||
114 | : MO_ATOM_IFALIGN, | ||
115 | - false); | ||
116 | + s_bits == MO_128); | ||
117 | a_mask = (1 << h->aa.align) - 1; | ||
118 | |||
119 | #ifdef CONFIG_SOFTMMU | ||
120 | - unsigned s_bits = opc & MO_SIZE; | ||
121 | unsigned s_mask = (1u << s_bits) - 1; | ||
122 | unsigned mem_index = get_mmuidx(oi); | ||
123 | TCGReg addr_adj; | ||
124 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, | ||
125 | } | ||
126 | } | ||
127 | |||
128 | +static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi, | ||
129 | + TCGReg addr_reg, MemOpIdx oi, bool is_ld) | ||
130 | +{ | ||
131 | + TCGLabelQemuLdst *ldst; | ||
132 | + HostAddress h; | ||
133 | + TCGReg base; | ||
134 | + bool use_pair; | ||
135 | + | ||
136 | + ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld); | ||
137 | + | ||
138 | + /* Compose the final address, as LDP/STP have no indexing. */ | ||
139 | + if (h.index == TCG_REG_XZR) { | ||
140 | + base = h.base; | ||
141 | + } else { | ||
142 | + base = TCG_REG_TMP2; | ||
143 | + if (h.index_ext == TCG_TYPE_I32) { | ||
144 | + /* add base, base, index, uxtw */ | ||
145 | + tcg_out_insn(s, 3501, ADD, TCG_TYPE_I64, base, | ||
146 | + h.base, h.index, MO_32, 0); | ||
147 | + } else { | ||
148 | + /* add base, base, index */ | ||
149 | + tcg_out_insn(s, 3502, ADD, 1, base, h.base, h.index); | ||
150 | + } | ||
151 | + } | ||
152 | + | ||
153 | + use_pair = h.aa.atom < MO_128 || have_lse2; | ||
154 | + | ||
155 | + if (!use_pair) { | ||
156 | + tcg_insn_unit *branch = NULL; | ||
157 | + TCGReg ll, lh, sl, sh; | ||
158 | + | ||
159 | + /* | ||
160 | + * If we have already checked for 16-byte alignment, that's all | ||
161 | + * we need. Otherwise we have determined that misaligned atomicity | ||
162 | + * may be handled with two 8-byte loads. | ||
163 | + */ | ||
164 | + if (h.aa.align < MO_128) { | ||
165 | + /* | ||
166 | + * TODO: align should be MO_64, so we only need test bit 3, | ||
167 | + * which means we could use TBNZ instead of ANDS+B_C. | ||
168 | + */ | ||
169 | + tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, 15); | ||
170 | + branch = s->code_ptr; | ||
171 | + tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0); | ||
172 | + use_pair = true; | ||
173 | + } | ||
174 | + | ||
175 | + if (is_ld) { | ||
176 | + /* | ||
177 | + * 16-byte atomicity without LSE2 requires LDXP+STXP loop: | ||
178 | + * ldxp lo, hi, [base] | ||
179 | + * stxp t0, lo, hi, [base] | ||
180 | + * cbnz t0, .-8 | ||
181 | + * Require no overlap between data{lo,hi} and base. | ||
182 | + */ | ||
183 | + if (datalo == base || datahi == base) { | ||
184 | + tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_TMP2, base); | ||
185 | + base = TCG_REG_TMP2; | ||
186 | + } | ||
187 | + ll = sl = datalo; | ||
188 | + lh = sh = datahi; | ||
189 | + } else { | ||
190 | + /* | ||
191 | + * 16-byte atomicity without LSE2 requires LDXP+STXP loop: | ||
192 | + * 1: ldxp t0, t1, [base] | ||
193 | + * stxp t0, lo, hi, [base] | ||
194 | + * cbnz t0, 1b | ||
195 | + */ | ||
196 | + tcg_debug_assert(base != TCG_REG_TMP0 && base != TCG_REG_TMP1); | ||
197 | + ll = TCG_REG_TMP0; | ||
198 | + lh = TCG_REG_TMP1; | ||
199 | + sl = datalo; | ||
200 | + sh = datahi; | ||
201 | + } | ||
202 | + | ||
203 | + tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR, ll, lh, base); | ||
204 | + tcg_out_insn(s, 3306, STXP, TCG_REG_TMP0, sl, sh, base); | ||
205 | + tcg_out_insn(s, 3201, CBNZ, 0, TCG_REG_TMP0, -2); | ||
206 | + | ||
207 | + if (use_pair) { | ||
208 | + /* "b .+8", branching across the one insn of use_pair. */ | ||
209 | + tcg_out_insn(s, 3206, B, 2); | ||
210 | + reloc_pc19(branch, tcg_splitwx_to_rx(s->code_ptr)); | ||
211 | + } | ||
212 | + } | ||
213 | + | ||
214 | + if (use_pair) { | ||
215 | + if (is_ld) { | ||
216 | + tcg_out_insn(s, 3314, LDP, datalo, datahi, base, 0, 1, 0); | ||
217 | + } else { | ||
218 | + tcg_out_insn(s, 3314, STP, datalo, datahi, base, 0, 1, 0); | ||
219 | + } | ||
220 | + } | ||
221 | + | ||
222 | + if (ldst) { | ||
223 | + ldst->type = TCG_TYPE_I128; | ||
224 | + ldst->datalo_reg = datalo; | ||
225 | + ldst->datahi_reg = datahi; | ||
226 | + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); | ||
227 | + } | ||
228 | +} | ||
229 | + | ||
230 | static const tcg_insn_unit *tb_ret_addr; | ||
231 | |||
232 | static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) | ||
233 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, | ||
234 | case INDEX_op_qemu_st_a64_i64: | ||
235 | tcg_out_qemu_st(s, REG0(0), a1, a2, ext); | ||
236 | break; | ||
237 | + case INDEX_op_qemu_ld_a32_i128: | ||
238 | + case INDEX_op_qemu_ld_a64_i128: | ||
239 | + tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], true); | ||
240 | + break; | ||
241 | + case INDEX_op_qemu_st_a32_i128: | ||
242 | + case INDEX_op_qemu_st_a64_i128: | ||
243 | + tcg_out_qemu_ldst_i128(s, REG0(0), REG0(1), a2, args[3], false); | ||
244 | + break; | ||
245 | |||
246 | case INDEX_op_bswap64_i64: | ||
247 | tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1); | ||
248 | @@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) | ||
249 | case INDEX_op_qemu_ld_a32_i64: | ||
250 | case INDEX_op_qemu_ld_a64_i64: | ||
251 | return C_O1_I1(r, r); | ||
252 | + case INDEX_op_qemu_ld_a32_i128: | ||
253 | + case INDEX_op_qemu_ld_a64_i128: | ||
254 | + return C_O2_I1(r, r, r); | ||
255 | case INDEX_op_qemu_st_a32_i32: | ||
256 | case INDEX_op_qemu_st_a64_i32: | ||
257 | case INDEX_op_qemu_st_a32_i64: | ||
258 | case INDEX_op_qemu_st_a64_i64: | ||
259 | return C_O0_I2(rZ, r); | ||
260 | + case INDEX_op_qemu_st_a32_i128: | ||
261 | + case INDEX_op_qemu_st_a64_i128: | ||
262 | + return C_O0_I3(rZ, rZ, r); | ||
263 | |||
264 | case INDEX_op_deposit_i32: | ||
265 | case INDEX_op_deposit_i64: | ||
266 | -- | ||
267 | 2.34.1 | diff view generated by jsdifflib |
1 | The helpers for reset_rf, cli, sti, clac, stac are | 1 | Use LQ/STQ with ISA v2.07, and 16-byte atomicity is required. |
---|---|---|---|
2 | completely trivial; implement them inline. | 2 | Note that these instructions do not require 16-byte alignment. |
3 | 3 | ||
4 | Drop some nearby #if 0 code. | 4 | Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com> |
5 | |||
6 | Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> | ||
7 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
9 | --- | 6 | --- |
10 | target/i386/helper.h | 5 ----- | 7 | tcg/ppc/tcg-target-con-set.h | 2 + |
11 | target/i386/tcg/cc_helper.c | 41 ------------------------------------- | 8 | tcg/ppc/tcg-target-con-str.h | 1 + |
12 | target/i386/tcg/translate.c | 30 ++++++++++++++++++++++----- | 9 | tcg/ppc/tcg-target.h | 3 +- |
13 | 3 files changed, 25 insertions(+), 51 deletions(-) | 10 | tcg/ppc/tcg-target.c.inc | 108 +++++++++++++++++++++++++++++++---- |
11 | 4 files changed, 101 insertions(+), 13 deletions(-) | ||
14 | 12 | ||
15 | diff --git a/target/i386/helper.h b/target/i386/helper.h | 13 | diff --git a/tcg/ppc/tcg-target-con-set.h b/tcg/ppc/tcg-target-con-set.h |
16 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/target/i386/helper.h | 15 | --- a/tcg/ppc/tcg-target-con-set.h |
18 | +++ b/target/i386/helper.h | 16 | +++ b/tcg/ppc/tcg-target-con-set.h |
19 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_2(syscall, void, env, int) | 17 | @@ -XXX,XX +XXX,XX @@ C_O0_I2(r, r) |
20 | DEF_HELPER_2(sysret, void, env, int) | 18 | C_O0_I2(r, ri) |
19 | C_O0_I2(v, r) | ||
20 | C_O0_I3(r, r, r) | ||
21 | +C_O0_I3(o, m, r) | ||
22 | C_O0_I4(r, r, ri, ri) | ||
23 | C_O0_I4(r, r, r, r) | ||
24 | C_O1_I1(r, r) | ||
25 | @@ -XXX,XX +XXX,XX @@ C_O1_I3(v, v, v, v) | ||
26 | C_O1_I4(r, r, ri, rZ, rZ) | ||
27 | C_O1_I4(r, r, r, ri, ri) | ||
28 | C_O2_I1(r, r, r) | ||
29 | +C_O2_I1(o, m, r) | ||
30 | C_O2_I2(r, r, r, r) | ||
31 | C_O2_I4(r, r, rI, rZM, r, r) | ||
32 | C_O2_I4(r, r, r, r, rI, rZM) | ||
33 | diff --git a/tcg/ppc/tcg-target-con-str.h b/tcg/ppc/tcg-target-con-str.h | ||
34 | index XXXXXXX..XXXXXXX 100644 | ||
35 | --- a/tcg/ppc/tcg-target-con-str.h | ||
36 | +++ b/tcg/ppc/tcg-target-con-str.h | ||
37 | @@ -XXX,XX +XXX,XX @@ | ||
38 | * REGS(letter, register_mask) | ||
39 | */ | ||
40 | REGS('r', ALL_GENERAL_REGS) | ||
41 | +REGS('o', ALL_GENERAL_REGS & 0xAAAAAAAAu) /* odd registers */ | ||
42 | REGS('v', ALL_VECTOR_REGS) | ||
43 | |||
44 | /* | ||
45 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | ||
46 | index XXXXXXX..XXXXXXX 100644 | ||
47 | --- a/tcg/ppc/tcg-target.h | ||
48 | +++ b/tcg/ppc/tcg-target.h | ||
49 | @@ -XXX,XX +XXX,XX @@ extern bool have_vsx; | ||
50 | #define TCG_TARGET_HAS_mulsh_i64 1 | ||
21 | #endif | 51 | #endif |
22 | DEF_HELPER_FLAGS_2(pause, TCG_CALL_NO_WG, noreturn, env, int) | 52 | |
23 | -DEF_HELPER_1(reset_rf, void, env) | 53 | -#define TCG_TARGET_HAS_qemu_ldst_i128 0 |
24 | DEF_HELPER_FLAGS_3(raise_interrupt, TCG_CALL_NO_WG, noreturn, env, int, int) | 54 | +#define TCG_TARGET_HAS_qemu_ldst_i128 \ |
25 | DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_WG, noreturn, env, int) | 55 | + (TCG_TARGET_REG_BITS == 64 && have_isa_2_07) |
26 | -DEF_HELPER_1(cli, void, env) | 56 | |
27 | -DEF_HELPER_1(sti, void, env) | 57 | /* |
28 | -DEF_HELPER_1(clac, void, env) | 58 | * While technically Altivec could support V64, it has no 64-bit store |
29 | -DEF_HELPER_1(stac, void, env) | 59 | diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc |
30 | DEF_HELPER_3(boundw, void, env, tl, int) | 60 | index XXXXXXX..XXXXXXX 100644 |
31 | DEF_HELPER_3(boundl, void, env, tl, int) | 61 | --- a/tcg/ppc/tcg-target.c.inc |
32 | 62 | +++ b/tcg/ppc/tcg-target.c.inc | |
33 | diff --git a/target/i386/tcg/cc_helper.c b/target/i386/tcg/cc_helper.c | 63 | @@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) |
34 | index XXXXXXX..XXXXXXX 100644 | 64 | |
35 | --- a/target/i386/tcg/cc_helper.c | 65 | #define B OPCD( 18) |
36 | +++ b/target/i386/tcg/cc_helper.c | 66 | #define BC OPCD( 16) |
37 | @@ -XXX,XX +XXX,XX @@ void helper_clts(CPUX86State *env) | 67 | + |
38 | env->cr[0] &= ~CR0_TS_MASK; | 68 | #define LBZ OPCD( 34) |
39 | env->hflags &= ~HF_TS_MASK; | 69 | #define LHZ OPCD( 40) |
70 | #define LHA OPCD( 42) | ||
71 | #define LWZ OPCD( 32) | ||
72 | #define LWZUX XO31( 55) | ||
73 | -#define STB OPCD( 38) | ||
74 | -#define STH OPCD( 44) | ||
75 | -#define STW OPCD( 36) | ||
76 | - | ||
77 | -#define STD XO62( 0) | ||
78 | -#define STDU XO62( 1) | ||
79 | -#define STDX XO31(149) | ||
80 | - | ||
81 | #define LD XO58( 0) | ||
82 | #define LDX XO31( 21) | ||
83 | #define LDU XO58( 1) | ||
84 | #define LDUX XO31( 53) | ||
85 | #define LWA XO58( 2) | ||
86 | #define LWAX XO31(341) | ||
87 | +#define LQ OPCD( 56) | ||
88 | + | ||
89 | +#define STB OPCD( 38) | ||
90 | +#define STH OPCD( 44) | ||
91 | +#define STW OPCD( 36) | ||
92 | +#define STD XO62( 0) | ||
93 | +#define STDU XO62( 1) | ||
94 | +#define STDX XO31(149) | ||
95 | +#define STQ XO62( 2) | ||
96 | |||
97 | #define ADDIC OPCD( 12) | ||
98 | #define ADDI OPCD( 14) | ||
99 | @@ -XXX,XX +XXX,XX @@ typedef struct { | ||
100 | |||
101 | bool tcg_target_has_memory_bswap(MemOp memop) | ||
102 | { | ||
103 | - return true; | ||
104 | + TCGAtomAlign aa; | ||
105 | + | ||
106 | + if ((memop & MO_SIZE) <= MO_64) { | ||
107 | + return true; | ||
108 | + } | ||
109 | + | ||
110 | + /* | ||
111 | + * Reject 16-byte memop with 16-byte atomicity, | ||
112 | + * but do allow a pair of 64-bit operations. | ||
113 | + */ | ||
114 | + aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true); | ||
115 | + return aa.atom <= MO_64; | ||
40 | } | 116 | } |
41 | - | 117 | |
42 | -void helper_reset_rf(CPUX86State *env) | 118 | /* |
43 | -{ | 119 | @@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, |
44 | - env->eflags &= ~RF_MASK; | 120 | { |
45 | -} | 121 | TCGLabelQemuLdst *ldst = NULL; |
46 | - | 122 | MemOp opc = get_memop(oi); |
47 | -void helper_cli(CPUX86State *env) | 123 | - MemOp a_bits; |
48 | -{ | 124 | + MemOp a_bits, s_bits; |
49 | - env->eflags &= ~IF_MASK; | 125 | |
50 | -} | 126 | /* |
51 | - | 127 | * Book II, Section 1.4, Single-Copy Atomicity, specifies: |
52 | -void helper_sti(CPUX86State *env) | 128 | @@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, |
53 | -{ | 129 | * As of 3.0, "the non-atomic access is performed as described in |
54 | - env->eflags |= IF_MASK; | 130 | * the corresponding list", which matches MO_ATOM_SUBALIGN. |
55 | -} | 131 | */ |
56 | - | 132 | + s_bits = opc & MO_SIZE; |
57 | -void helper_clac(CPUX86State *env) | 133 | h->aa = atom_and_align_for_opc(s, opc, |
58 | -{ | 134 | have_isa_3_00 ? MO_ATOM_SUBALIGN |
59 | - env->eflags &= ~AC_MASK; | 135 | : MO_ATOM_IFALIGN, |
60 | -} | 136 | - false); |
61 | - | 137 | + s_bits == MO_128); |
62 | -void helper_stac(CPUX86State *env) | 138 | a_bits = h->aa.align; |
63 | -{ | 139 | |
64 | - env->eflags |= AC_MASK; | 140 | #ifdef CONFIG_SOFTMMU |
65 | -} | 141 | @@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, |
66 | - | 142 | int fast_off = TLB_MASK_TABLE_OFS(mem_index); |
67 | -#if 0 | 143 | int mask_off = fast_off + offsetof(CPUTLBDescFast, mask); |
68 | -/* vm86plus instructions */ | 144 | int table_off = fast_off + offsetof(CPUTLBDescFast, table); |
69 | -void helper_cli_vm(CPUX86State *env) | 145 | - unsigned s_bits = opc & MO_SIZE; |
70 | -{ | 146 | |
71 | - env->eflags &= ~VIF_MASK; | 147 | ldst = new_ldst_label(s); |
72 | -} | 148 | ldst->is_ld = is_ld; |
73 | - | 149 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, |
74 | -void helper_sti_vm(CPUX86State *env) | ||
75 | -{ | ||
76 | - env->eflags |= VIF_MASK; | ||
77 | - if (env->eflags & VIP_MASK) { | ||
78 | - raise_exception_ra(env, EXCP0D_GPF, GETPC()); | ||
79 | - } | ||
80 | -} | ||
81 | -#endif | ||
82 | diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c | ||
83 | index XXXXXXX..XXXXXXX 100644 | ||
84 | --- a/target/i386/tcg/translate.c | ||
85 | +++ b/target/i386/tcg/translate.c | ||
86 | @@ -XXX,XX +XXX,XX @@ static void gen_reset_hflag(DisasContext *s, uint32_t mask) | ||
87 | } | 150 | } |
88 | } | 151 | } |
89 | 152 | ||
90 | +static void gen_set_eflags(DisasContext *s, target_ulong mask) | 153 | +static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi, |
154 | + TCGReg addr_reg, MemOpIdx oi, bool is_ld) | ||
91 | +{ | 155 | +{ |
92 | + TCGv t = tcg_temp_new(); | 156 | + TCGLabelQemuLdst *ldst; |
93 | + | 157 | + HostAddress h; |
94 | + tcg_gen_ld_tl(t, cpu_env, offsetof(CPUX86State, eflags)); | 158 | + bool need_bswap; |
95 | + tcg_gen_ori_tl(t, t, mask); | 159 | + uint32_t insn; |
96 | + tcg_gen_st_tl(t, cpu_env, offsetof(CPUX86State, eflags)); | 160 | + TCGReg index; |
97 | + tcg_temp_free(t); | 161 | + |
162 | + ldst = prepare_host_addr(s, &h, addr_reg, -1, oi, is_ld); | ||
163 | + | ||
164 | + /* Compose the final address, as LQ/STQ have no indexing. */ | ||
165 | + index = h.index; | ||
166 | + if (h.base != 0) { | ||
167 | + index = TCG_REG_TMP1; | ||
168 | + tcg_out32(s, ADD | TAB(index, h.base, h.index)); | ||
169 | + } | ||
170 | + need_bswap = get_memop(oi) & MO_BSWAP; | ||
171 | + | ||
172 | + if (h.aa.atom == MO_128) { | ||
173 | + tcg_debug_assert(!need_bswap); | ||
174 | + tcg_debug_assert(datalo & 1); | ||
175 | + tcg_debug_assert(datahi == datalo - 1); | ||
176 | + insn = is_ld ? LQ : STQ; | ||
177 | + tcg_out32(s, insn | TAI(datahi, index, 0)); | ||
178 | + } else { | ||
179 | + TCGReg d1, d2; | ||
180 | + | ||
181 | + if (HOST_BIG_ENDIAN ^ need_bswap) { | ||
182 | + d1 = datahi, d2 = datalo; | ||
183 | + } else { | ||
184 | + d1 = datalo, d2 = datahi; | ||
185 | + } | ||
186 | + | ||
187 | + if (need_bswap) { | ||
188 | + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 8); | ||
189 | + insn = is_ld ? LDBRX : STDBRX; | ||
190 | + tcg_out32(s, insn | TAB(d1, 0, index)); | ||
191 | + tcg_out32(s, insn | TAB(d2, index, TCG_REG_R0)); | ||
192 | + } else { | ||
193 | + insn = is_ld ? LD : STD; | ||
194 | + tcg_out32(s, insn | TAI(d1, index, 0)); | ||
195 | + tcg_out32(s, insn | TAI(d2, index, 8)); | ||
196 | + } | ||
197 | + } | ||
198 | + | ||
199 | + if (ldst) { | ||
200 | + ldst->type = TCG_TYPE_I128; | ||
201 | + ldst->datalo_reg = datalo; | ||
202 | + ldst->datahi_reg = datahi; | ||
203 | + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); | ||
204 | + } | ||
98 | +} | 205 | +} |
99 | + | 206 | + |
100 | +static void gen_reset_eflags(DisasContext *s, target_ulong mask) | 207 | static void tcg_out_nop_fill(tcg_insn_unit *p, int count) |
101 | +{ | ||
102 | + TCGv t = tcg_temp_new(); | ||
103 | + | ||
104 | + tcg_gen_ld_tl(t, cpu_env, offsetof(CPUX86State, eflags)); | ||
105 | + tcg_gen_andi_tl(t, t, ~mask); | ||
106 | + tcg_gen_st_tl(t, cpu_env, offsetof(CPUX86State, eflags)); | ||
107 | + tcg_temp_free(t); | ||
108 | +} | ||
109 | + | ||
110 | /* Clear BND registers during legacy branches. */ | ||
111 | static void gen_bnd_jmp(DisasContext *s) | ||
112 | { | 208 | { |
113 | @@ -XXX,XX +XXX,XX @@ do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr) | 209 | int i; |
114 | } | 210 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, |
115 | 211 | args[4], TCG_TYPE_I64); | |
116 | if (s->base.tb->flags & HF_RF_MASK) { | ||
117 | - gen_helper_reset_rf(cpu_env); | ||
118 | + gen_reset_eflags(s, RF_MASK); | ||
119 | } | ||
120 | if (recheck_tf) { | ||
121 | gen_helper_rechecking_single_step(cpu_env); | ||
122 | @@ -XXX,XX +XXX,XX @@ static bool disas_insn(DisasContext *s, CPUState *cpu) | ||
123 | #endif | ||
124 | case 0xfa: /* cli */ | ||
125 | if (check_iopl(s)) { | ||
126 | - gen_helper_cli(cpu_env); | ||
127 | + gen_reset_eflags(s, IF_MASK); | ||
128 | } | 212 | } |
129 | break; | 213 | break; |
130 | case 0xfb: /* sti */ | 214 | + case INDEX_op_qemu_ld_a32_i128: |
131 | if (check_iopl(s)) { | 215 | + case INDEX_op_qemu_ld_a64_i128: |
132 | - gen_helper_sti(cpu_env); | 216 | + tcg_debug_assert(TCG_TARGET_REG_BITS == 64); |
133 | + gen_set_eflags(s, IF_MASK); | 217 | + tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true); |
134 | /* interruptions are enabled only the first insn after sti */ | 218 | + break; |
135 | gen_update_eip_next(s); | 219 | |
136 | gen_eob_inhibit_irq(s, true); | 220 | case INDEX_op_qemu_st_a64_i32: |
137 | @@ -XXX,XX +XXX,XX @@ static bool disas_insn(DisasContext *s, CPUState *cpu) | 221 | if (TCG_TARGET_REG_BITS == 32) { |
138 | || CPL(s) != 0) { | 222 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, |
139 | goto illegal_op; | 223 | args[4], TCG_TYPE_I64); |
140 | } | 224 | } |
141 | - gen_helper_clac(cpu_env); | 225 | break; |
142 | + gen_reset_eflags(s, AC_MASK); | 226 | + case INDEX_op_qemu_st_a32_i128: |
143 | s->base.is_jmp = DISAS_EOB_NEXT; | 227 | + case INDEX_op_qemu_st_a64_i128: |
144 | break; | 228 | + tcg_debug_assert(TCG_TARGET_REG_BITS == 64); |
145 | 229 | + tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false); | |
146 | @@ -XXX,XX +XXX,XX @@ static bool disas_insn(DisasContext *s, CPUState *cpu) | 230 | + break; |
147 | || CPL(s) != 0) { | 231 | |
148 | goto illegal_op; | 232 | case INDEX_op_setcond_i32: |
149 | } | 233 | tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], |
150 | - gen_helper_stac(cpu_env); | 234 | @@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) |
151 | + gen_set_eflags(s, AC_MASK); | 235 | case INDEX_op_qemu_st_a64_i64: |
152 | s->base.is_jmp = DISAS_EOB_NEXT; | 236 | return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I4(r, r, r, r); |
153 | break; | 237 | |
154 | 238 | + case INDEX_op_qemu_ld_a32_i128: | |
239 | + case INDEX_op_qemu_ld_a64_i128: | ||
240 | + return C_O2_I1(o, m, r); | ||
241 | + case INDEX_op_qemu_st_a32_i128: | ||
242 | + case INDEX_op_qemu_st_a64_i128: | ||
243 | + return C_O0_I3(o, m, r); | ||
244 | + | ||
245 | case INDEX_op_add_vec: | ||
246 | case INDEX_op_sub_vec: | ||
247 | case INDEX_op_mul_vec: | ||
155 | -- | 248 | -- |
156 | 2.34.1 | 249 | 2.34.1 |
157 | |||
158 | diff view generated by jsdifflib |
1 | Avoid cpu_restore_state, and modifying env->eip out from | 1 | Use LPQ/STPQ when 16-byte atomicity is required. |
---|---|---|---|
2 | underneath the translator with TARGET_TB_PCREL. There is | 2 | Note that these instructions require 16-byte alignment. |
3 | some slight duplication from x86_restore_state_to_opc, | ||
4 | but it's just a few lines. | ||
5 | 3 | ||
6 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1269 | 4 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
7 | Reviewed-by: Claudio Fontana <cfontana@suse.de> | ||
8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
9 | --- | 6 | --- |
10 | target/i386/helper.c | 21 +++++++++++++++++++-- | 7 | tcg/s390x/tcg-target-con-set.h | 2 + |
11 | 1 file changed, 19 insertions(+), 2 deletions(-) | 8 | tcg/s390x/tcg-target.h | 2 +- |
9 | tcg/s390x/tcg-target.c.inc | 107 ++++++++++++++++++++++++++++++++- | ||
10 | 3 files changed, 107 insertions(+), 4 deletions(-) | ||
12 | 11 | ||
13 | diff --git a/target/i386/helper.c b/target/i386/helper.c | 12 | diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h |
14 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/target/i386/helper.c | 14 | --- a/tcg/s390x/tcg-target-con-set.h |
16 | +++ b/target/i386/helper.c | 15 | +++ b/tcg/s390x/tcg-target-con-set.h |
17 | @@ -XXX,XX +XXX,XX @@ void cpu_x86_inject_mce(Monitor *mon, X86CPU *cpu, int bank, | 16 | @@ -XXX,XX +XXX,XX @@ C_O0_I2(r, r) |
17 | C_O0_I2(r, ri) | ||
18 | C_O0_I2(r, rA) | ||
19 | C_O0_I2(v, r) | ||
20 | +C_O0_I3(o, m, r) | ||
21 | C_O1_I1(r, r) | ||
22 | C_O1_I1(v, r) | ||
23 | C_O1_I1(v, v) | ||
24 | @@ -XXX,XX +XXX,XX @@ C_O1_I2(v, v, v) | ||
25 | C_O1_I3(v, v, v, v) | ||
26 | C_O1_I4(r, r, ri, rI, r) | ||
27 | C_O1_I4(r, r, rA, rI, r) | ||
28 | +C_O2_I1(o, m, r) | ||
29 | C_O2_I2(o, m, 0, r) | ||
30 | C_O2_I2(o, m, r, r) | ||
31 | C_O2_I3(o, m, 0, 1, r) | ||
32 | diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h | ||
33 | index XXXXXXX..XXXXXXX 100644 | ||
34 | --- a/tcg/s390x/tcg-target.h | ||
35 | +++ b/tcg/s390x/tcg-target.h | ||
36 | @@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3]; | ||
37 | #define TCG_TARGET_HAS_muluh_i64 0 | ||
38 | #define TCG_TARGET_HAS_mulsh_i64 0 | ||
39 | |||
40 | -#define TCG_TARGET_HAS_qemu_ldst_i128 0 | ||
41 | +#define TCG_TARGET_HAS_qemu_ldst_i128 1 | ||
42 | |||
43 | #define TCG_TARGET_HAS_v64 HAVE_FACILITY(VECTOR) | ||
44 | #define TCG_TARGET_HAS_v128 HAVE_FACILITY(VECTOR) | ||
45 | diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc | ||
46 | index XXXXXXX..XXXXXXX 100644 | ||
47 | --- a/tcg/s390x/tcg-target.c.inc | ||
48 | +++ b/tcg/s390x/tcg-target.c.inc | ||
49 | @@ -XXX,XX +XXX,XX @@ typedef enum S390Opcode { | ||
50 | RXY_LLGF = 0xe316, | ||
51 | RXY_LLGH = 0xe391, | ||
52 | RXY_LMG = 0xeb04, | ||
53 | + RXY_LPQ = 0xe38f, | ||
54 | RXY_LRV = 0xe31e, | ||
55 | RXY_LRVG = 0xe30f, | ||
56 | RXY_LRVH = 0xe31f, | ||
57 | @@ -XXX,XX +XXX,XX @@ typedef enum S390Opcode { | ||
58 | RXY_STG = 0xe324, | ||
59 | RXY_STHY = 0xe370, | ||
60 | RXY_STMG = 0xeb24, | ||
61 | + RXY_STPQ = 0xe38e, | ||
62 | RXY_STRV = 0xe33e, | ||
63 | RXY_STRVG = 0xe32f, | ||
64 | RXY_STRVH = 0xe33f, | ||
65 | @@ -XXX,XX +XXX,XX @@ typedef struct { | ||
66 | |||
67 | bool tcg_target_has_memory_bswap(MemOp memop) | ||
68 | { | ||
69 | - return true; | ||
70 | + TCGAtomAlign aa; | ||
71 | + | ||
72 | + if ((memop & MO_SIZE) <= MO_64) { | ||
73 | + return true; | ||
74 | + } | ||
75 | + | ||
76 | + /* | ||
77 | + * Reject 16-byte memop with 16-byte atomicity, | ||
78 | + * but do allow a pair of 64-bit operations. | ||
79 | + */ | ||
80 | + aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true); | ||
81 | + return aa.atom <= MO_64; | ||
82 | } | ||
83 | |||
84 | static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data, | ||
85 | @@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, | ||
86 | { | ||
87 | TCGLabelQemuLdst *ldst = NULL; | ||
88 | MemOp opc = get_memop(oi); | ||
89 | + MemOp s_bits = opc & MO_SIZE; | ||
90 | unsigned a_mask; | ||
91 | |||
92 | - h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false); | ||
93 | + h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128); | ||
94 | a_mask = (1 << h->aa.align) - 1; | ||
95 | |||
96 | #ifdef CONFIG_SOFTMMU | ||
97 | - unsigned s_bits = opc & MO_SIZE; | ||
98 | unsigned s_mask = (1 << s_bits) - 1; | ||
99 | int mem_index = get_mmuidx(oi); | ||
100 | int fast_off = TLB_MASK_TABLE_OFS(mem_index); | ||
101 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, | ||
18 | } | 102 | } |
19 | } | 103 | } |
20 | 104 | ||
21 | +static target_ulong get_memio_eip(CPUX86State *env) | 105 | +static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi, |
106 | + TCGReg addr_reg, MemOpIdx oi, bool is_ld) | ||
22 | +{ | 107 | +{ |
23 | + uint64_t data[TARGET_INSN_START_WORDS]; | 108 | + TCGLabel *l1 = NULL, *l2 = NULL; |
24 | + CPUState *cs = env_cpu(env); | 109 | + TCGLabelQemuLdst *ldst; |
25 | + | 110 | + HostAddress h; |
26 | + if (!cpu_unwind_state_data(cs, cs->mem_io_pc, data)) { | 111 | + bool need_bswap; |
27 | + return env->eip; | 112 | + bool use_pair; |
28 | + } | 113 | + S390Opcode insn; |
29 | + | 114 | + |
30 | + /* Per x86_restore_state_to_opc. */ | 115 | + ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld); |
31 | + if (TARGET_TB_PCREL) { | 116 | + |
32 | + return (env->eip & TARGET_PAGE_MASK) | data[0]; | 117 | + use_pair = h.aa.atom < MO_128; |
33 | + } else { | 118 | + need_bswap = get_memop(oi) & MO_BSWAP; |
34 | + return data[0] - env->segs[R_CS].base; | 119 | + |
120 | + if (!use_pair) { | ||
121 | + /* | ||
122 | + * Atomicity requires we use LPQ. If we've already checked for | ||
123 | + * 16-byte alignment, that's all we need. If we arrive with | ||
124 | + * lesser alignment, we have determined that less than 16-byte | ||
125 | + * alignment can be satisfied with two 8-byte loads. | ||
126 | + */ | ||
127 | + if (h.aa.align < MO_128) { | ||
128 | + use_pair = true; | ||
129 | + l1 = gen_new_label(); | ||
130 | + l2 = gen_new_label(); | ||
131 | + | ||
132 | + tcg_out_insn(s, RI, TMLL, addr_reg, 15); | ||
133 | + tgen_branch(s, 7, l1); /* CC in {1,2,3} */ | ||
134 | + } | ||
135 | + | ||
136 | + tcg_debug_assert(!need_bswap); | ||
137 | + tcg_debug_assert(datalo & 1); | ||
138 | + tcg_debug_assert(datahi == datalo - 1); | ||
139 | + insn = is_ld ? RXY_LPQ : RXY_STPQ; | ||
140 | + tcg_out_insn_RXY(s, insn, datahi, h.base, h.index, h.disp); | ||
141 | + | ||
142 | + if (use_pair) { | ||
143 | + tgen_branch(s, S390_CC_ALWAYS, l2); | ||
144 | + tcg_out_label(s, l1); | ||
145 | + } | ||
146 | + } | ||
147 | + if (use_pair) { | ||
148 | + TCGReg d1, d2; | ||
149 | + | ||
150 | + if (need_bswap) { | ||
151 | + d1 = datalo, d2 = datahi; | ||
152 | + insn = is_ld ? RXY_LRVG : RXY_STRVG; | ||
153 | + } else { | ||
154 | + d1 = datahi, d2 = datalo; | ||
155 | + insn = is_ld ? RXY_LG : RXY_STG; | ||
156 | + } | ||
157 | + | ||
158 | + if (h.base == d1 || h.index == d1) { | ||
159 | + tcg_out_insn(s, RXY, LAY, TCG_TMP0, h.base, h.index, h.disp); | ||
160 | + h.base = TCG_TMP0; | ||
161 | + h.index = TCG_REG_NONE; | ||
162 | + h.disp = 0; | ||
163 | + } | ||
164 | + tcg_out_insn_RXY(s, insn, d1, h.base, h.index, h.disp); | ||
165 | + tcg_out_insn_RXY(s, insn, d2, h.base, h.index, h.disp + 8); | ||
166 | + } | ||
167 | + if (l2) { | ||
168 | + tcg_out_label(s, l2); | ||
169 | + } | ||
170 | + | ||
171 | + if (ldst) { | ||
172 | + ldst->type = TCG_TYPE_I128; | ||
173 | + ldst->datalo_reg = datalo; | ||
174 | + ldst->datahi_reg = datahi; | ||
175 | + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); | ||
35 | + } | 176 | + } |
36 | +} | 177 | +} |
37 | + | 178 | + |
38 | void cpu_report_tpr_access(CPUX86State *env, TPRAccess access) | 179 | static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) |
39 | { | 180 | { |
40 | X86CPU *cpu = env_archcpu(env); | 181 | /* Reuse the zeroing that exists for goto_ptr. */ |
41 | @@ -XXX,XX +XXX,XX @@ void cpu_report_tpr_access(CPUX86State *env, TPRAccess access) | 182 | @@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, |
42 | 183 | case INDEX_op_qemu_st_a64_i64: | |
43 | cpu_interrupt(cs, CPU_INTERRUPT_TPR); | 184 | tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I64); |
44 | } else if (tcg_enabled()) { | 185 | break; |
45 | - cpu_restore_state(cs, cs->mem_io_pc, false); | 186 | + case INDEX_op_qemu_ld_a32_i128: |
46 | + target_ulong eip = get_memio_eip(env); | 187 | + case INDEX_op_qemu_ld_a64_i128: |
47 | 188 | + tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true); | |
48 | - apic_handle_tpr_access_report(cpu->apic_state, env->eip, access); | 189 | + break; |
49 | + apic_handle_tpr_access_report(cpu->apic_state, eip, access); | 190 | + case INDEX_op_qemu_st_a32_i128: |
50 | } | 191 | + case INDEX_op_qemu_st_a64_i128: |
51 | } | 192 | + tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false); |
52 | #endif /* !CONFIG_USER_ONLY */ | 193 | + break; |
194 | |||
195 | case INDEX_op_ld16s_i64: | ||
196 | tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]); | ||
197 | @@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) | ||
198 | case INDEX_op_qemu_st_a32_i32: | ||
199 | case INDEX_op_qemu_st_a64_i32: | ||
200 | return C_O0_I2(r, r); | ||
201 | + case INDEX_op_qemu_ld_a32_i128: | ||
202 | + case INDEX_op_qemu_ld_a64_i128: | ||
203 | + return C_O2_I1(o, m, r); | ||
204 | + case INDEX_op_qemu_st_a32_i128: | ||
205 | + case INDEX_op_qemu_st_a64_i128: | ||
206 | + return C_O0_I3(o, m, r); | ||
207 | |||
208 | case INDEX_op_deposit_i32: | ||
209 | case INDEX_op_deposit_i64: | ||
53 | -- | 210 | -- |
54 | 2.34.1 | 211 | 2.34.1 | diff view generated by jsdifflib |
1 | The value passed is always true. | 1 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
---|---|---|---|
2 | |||
3 | Reviewed-by: Claudio Fontana <cfontana@suse.de> | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 2 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
5 | --- | 3 | --- |
6 | accel/tcg/internal.h | 2 +- | 4 | .../generic/host/load-extract-al16-al8.h | 45 +++++++++++++++++++ |
7 | accel/tcg/tb-maint.c | 4 ++-- | 5 | accel/tcg/ldst_atomicity.c.inc | 36 +-------------- |
8 | accel/tcg/translate-all.c | 15 +++++++-------- | 6 | 2 files changed, 47 insertions(+), 34 deletions(-) |
9 | 3 files changed, 10 insertions(+), 11 deletions(-) | 7 | create mode 100644 host/include/generic/host/load-extract-al16-al8.h |
10 | 8 | ||
11 | diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h | 9 | diff --git a/host/include/generic/host/load-extract-al16-al8.h b/host/include/generic/host/load-extract-al16-al8.h |
10 | new file mode 100644 | ||
11 | index XXXXXXX..XXXXXXX | ||
12 | --- /dev/null | ||
13 | +++ b/host/include/generic/host/load-extract-al16-al8.h | ||
14 | @@ -XXX,XX +XXX,XX @@ | ||
15 | +/* | ||
16 | + * SPDX-License-Identifier: GPL-2.0-or-later | ||
17 | + * Atomic extract 64 from 128-bit, generic version. | ||
18 | + * | ||
19 | + * Copyright (C) 2023 Linaro, Ltd. | ||
20 | + */ | ||
21 | + | ||
22 | +#ifndef HOST_LOAD_EXTRACT_AL16_AL8_H | ||
23 | +#define HOST_LOAD_EXTRACT_AL16_AL8_H | ||
24 | + | ||
25 | +/** | ||
26 | + * load_atom_extract_al16_or_al8: | ||
27 | + * @pv: host address | ||
28 | + * @s: object size in bytes, @s <= 8. | ||
29 | + * | ||
30 | + * Load @s bytes from @pv, when pv % s != 0. If [p, p+s-1] does not | ||
31 | + * cross an 16-byte boundary then the access must be 16-byte atomic, | ||
32 | + * otherwise the access must be 8-byte atomic. | ||
33 | + */ | ||
34 | +static inline uint64_t ATTRIBUTE_ATOMIC128_OPT | ||
35 | +load_atom_extract_al16_or_al8(void *pv, int s) | ||
36 | +{ | ||
37 | + uintptr_t pi = (uintptr_t)pv; | ||
38 | + int o = pi & 7; | ||
39 | + int shr = (HOST_BIG_ENDIAN ? 16 - s - o : o) * 8; | ||
40 | + Int128 r; | ||
41 | + | ||
42 | + pv = (void *)(pi & ~7); | ||
43 | + if (pi & 8) { | ||
44 | + uint64_t *p8 = __builtin_assume_aligned(pv, 16, 8); | ||
45 | + uint64_t a = qatomic_read__nocheck(p8); | ||
46 | + uint64_t b = qatomic_read__nocheck(p8 + 1); | ||
47 | + | ||
48 | + if (HOST_BIG_ENDIAN) { | ||
49 | + r = int128_make128(b, a); | ||
50 | + } else { | ||
51 | + r = int128_make128(a, b); | ||
52 | + } | ||
53 | + } else { | ||
54 | + r = atomic16_read_ro(pv); | ||
55 | + } | ||
56 | + return int128_getlo(int128_urshift(r, shr)); | ||
57 | +} | ||
58 | + | ||
59 | +#endif /* HOST_LOAD_EXTRACT_AL16_AL8_H */ | ||
60 | diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc | ||
12 | index XXXXXXX..XXXXXXX 100644 | 61 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/accel/tcg/internal.h | 62 | --- a/accel/tcg/ldst_atomicity.c.inc |
14 | +++ b/accel/tcg/internal.h | 63 | +++ b/accel/tcg/ldst_atomicity.c.inc |
15 | @@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, | 64 | @@ -XXX,XX +XXX,XX @@ |
16 | tb_page_addr_t phys_page2); | 65 | * See the COPYING file in the top-level directory. |
17 | bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc); | 66 | */ |
18 | void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, | 67 | |
19 | - uintptr_t host_pc, bool reset_icount); | 68 | +#include "host/load-extract-al16-al8.h" |
20 | + uintptr_t host_pc); | 69 | + |
21 | 70 | #ifdef CONFIG_ATOMIC64 | |
22 | /* Return the current PC from CPU, which may be cached in TB. */ | 71 | # define HAVE_al8 true |
23 | static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb) | 72 | #else |
24 | diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c | 73 | @@ -XXX,XX +XXX,XX @@ static uint64_t load_atom_extract_al16_or_exit(CPUArchState *env, uintptr_t ra, |
25 | index XXXXXXX..XXXXXXX 100644 | 74 | return int128_getlo(r); |
26 | --- a/accel/tcg/tb-maint.c | ||
27 | +++ b/accel/tcg/tb-maint.c | ||
28 | @@ -XXX,XX +XXX,XX @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages, | ||
29 | * restore the CPU state. | ||
30 | */ | ||
31 | current_tb_modified = true; | ||
32 | - cpu_restore_state_from_tb(cpu, current_tb, retaddr, true); | ||
33 | + cpu_restore_state_from_tb(cpu, current_tb, retaddr); | ||
34 | } | ||
35 | #endif /* TARGET_HAS_PRECISE_SMC */ | ||
36 | tb_phys_invalidate__locked(tb); | ||
37 | @@ -XXX,XX +XXX,XX @@ bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc) | ||
38 | * function to partially restore the CPU state. | ||
39 | */ | ||
40 | current_tb_modified = true; | ||
41 | - cpu_restore_state_from_tb(cpu, current_tb, pc, true); | ||
42 | + cpu_restore_state_from_tb(cpu, current_tb, pc); | ||
43 | } | ||
44 | #endif /* TARGET_HAS_PRECISE_SMC */ | ||
45 | tb_phys_invalidate(tb, addr); | ||
46 | diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c | ||
47 | index XXXXXXX..XXXXXXX 100644 | ||
48 | --- a/accel/tcg/translate-all.c | ||
49 | +++ b/accel/tcg/translate-all.c | ||
50 | @@ -XXX,XX +XXX,XX @@ static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc, | ||
51 | } | 75 | } |
52 | 76 | ||
53 | /* | 77 | -/** |
54 | - * The cpu state corresponding to 'host_pc' is restored. | 78 | - * load_atom_extract_al16_or_al8: |
55 | - * When reset_icount is true, current TB will be interrupted and | 79 | - * @p: host address |
56 | - * icount should be recalculated. | 80 | - * @s: object size in bytes, @s <= 8. |
57 | + * The cpu state corresponding to 'host_pc' is restored in | 81 | - * |
58 | + * preparation for exiting the TB. | 82 | - * Load @s bytes from @p, when p % s != 0. If [p, p+s-1] does not |
59 | */ | 83 | - * cross an 16-byte boundary then the access must be 16-byte atomic, |
60 | void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, | 84 | - * otherwise the access must be 8-byte atomic. |
61 | - uintptr_t host_pc, bool reset_icount) | 85 | - */ |
62 | + uintptr_t host_pc) | 86 | -static inline uint64_t ATTRIBUTE_ATOMIC128_OPT |
63 | { | 87 | -load_atom_extract_al16_or_al8(void *pv, int s) |
64 | uint64_t data[TARGET_INSN_START_WORDS]; | 88 | -{ |
65 | #ifdef CONFIG_PROFILER | 89 | - uintptr_t pi = (uintptr_t)pv; |
66 | @@ -XXX,XX +XXX,XX @@ void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, | 90 | - int o = pi & 7; |
67 | return; | 91 | - int shr = (HOST_BIG_ENDIAN ? 16 - s - o : o) * 8; |
68 | } | 92 | - Int128 r; |
69 | 93 | - | |
70 | - if (reset_icount && (tb_cflags(tb) & CF_USE_ICOUNT)) { | 94 | - pv = (void *)(pi & ~7); |
71 | + if (tb_cflags(tb) & CF_USE_ICOUNT) { | 95 | - if (pi & 8) { |
72 | assert(icount_enabled()); | 96 | - uint64_t *p8 = __builtin_assume_aligned(pv, 16, 8); |
73 | /* | 97 | - uint64_t a = qatomic_read__nocheck(p8); |
74 | * Reset the cycle counter to the start of the block and | 98 | - uint64_t b = qatomic_read__nocheck(p8 + 1); |
75 | @@ -XXX,XX +XXX,XX @@ bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc) | 99 | - |
76 | if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) { | 100 | - if (HOST_BIG_ENDIAN) { |
77 | TranslationBlock *tb = tcg_tb_lookup(host_pc); | 101 | - r = int128_make128(b, a); |
78 | if (tb) { | 102 | - } else { |
79 | - cpu_restore_state_from_tb(cpu, tb, host_pc, true); | 103 | - r = int128_make128(a, b); |
80 | + cpu_restore_state_from_tb(cpu, tb, host_pc); | 104 | - } |
81 | return true; | 105 | - } else { |
82 | } | 106 | - r = atomic16_read_ro(pv); |
83 | } | 107 | - } |
84 | @@ -XXX,XX +XXX,XX @@ void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr) | 108 | - return int128_getlo(int128_urshift(r, shr)); |
85 | tb = tcg_tb_lookup(retaddr); | 109 | -} |
86 | if (tb) { | 110 | - |
87 | /* We can use retranslation to find the PC. */ | 111 | /** |
88 | - cpu_restore_state_from_tb(cpu, tb, retaddr, true); | 112 | * load_atom_4_by_2: |
89 | + cpu_restore_state_from_tb(cpu, tb, retaddr); | 113 | * @pv: host address |
90 | tb_phys_invalidate(tb, -1); | ||
91 | } else { | ||
92 | /* The exception probably happened in a helper. The CPU state should | ||
93 | @@ -XXX,XX +XXX,XX @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) | ||
94 | cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p", | ||
95 | (void *)retaddr); | ||
96 | } | ||
97 | - cpu_restore_state_from_tb(cpu, tb, retaddr, true); | ||
98 | + cpu_restore_state_from_tb(cpu, tb, retaddr); | ||
99 | |||
100 | /* | ||
101 | * Some guests must re-execute the branch when re-executing a delay | ||
102 | -- | 114 | -- |
103 | 2.34.1 | 115 | 2.34.1 | diff view generated by jsdifflib |
1 | Add a way to examine the unwind data without actually | 1 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> |
---|---|---|---|
2 | restoring the data back into env. | ||
3 | |||
4 | Reviewed-by: Claudio Fontana <cfontana@suse.de> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 2 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
6 | --- | 3 | --- |
7 | accel/tcg/internal.h | 4 +-- | 4 | host/include/generic/host/store-insert-al16.h | 50 +++++++++++++++++++ |
8 | include/exec/exec-all.h | 21 ++++++++--- | 5 | accel/tcg/ldst_atomicity.c.inc | 40 +-------------- |
9 | accel/tcg/translate-all.c | 74 ++++++++++++++++++++++++++------------- | 6 | 2 files changed, 51 insertions(+), 39 deletions(-) |
10 | 3 files changed, 68 insertions(+), 31 deletions(-) | 7 | create mode 100644 host/include/generic/host/store-insert-al16.h |
11 | 8 | ||
12 | diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h | 9 | diff --git a/host/include/generic/host/store-insert-al16.h b/host/include/generic/host/store-insert-al16.h |
13 | index XXXXXXX..XXXXXXX 100644 | 10 | new file mode 100644 |
14 | --- a/accel/tcg/internal.h | 11 | index XXXXXXX..XXXXXXX |
15 | +++ b/accel/tcg/internal.h | 12 | --- /dev/null |
16 | @@ -XXX,XX +XXX,XX @@ void tb_reset_jump(TranslationBlock *tb, int n); | 13 | +++ b/host/include/generic/host/store-insert-al16.h |
17 | TranslationBlock *tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, | 14 | @@ -XXX,XX +XXX,XX @@ |
18 | tb_page_addr_t phys_page2); | 15 | +/* |
19 | bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc); | 16 | + * SPDX-License-Identifier: GPL-2.0-or-later |
20 | -int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, | 17 | + * Atomic store insert into 128-bit, generic version. |
21 | - uintptr_t searched_pc, bool reset_icount); | 18 | + * |
22 | +void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, | 19 | + * Copyright (C) 2023 Linaro, Ltd. |
23 | + uintptr_t host_pc, bool reset_icount); | 20 | + */ |
24 | 21 | + | |
25 | /* Return the current PC from CPU, which may be cached in TB. */ | 22 | +#ifndef HOST_STORE_INSERT_AL16_H |
26 | static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb) | 23 | +#define HOST_STORE_INSERT_AL16_H |
27 | diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h | 24 | + |
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/include/exec/exec-all.h | ||
30 | +++ b/include/exec/exec-all.h | ||
31 | @@ -XXX,XX +XXX,XX @@ typedef ram_addr_t tb_page_addr_t; | ||
32 | #define TB_PAGE_ADDR_FMT RAM_ADDR_FMT | ||
33 | #endif | ||
34 | |||
35 | +/** | 25 | +/** |
36 | + * cpu_unwind_state_data: | 26 | + * store_atom_insert_al16: |
37 | + * @cpu: the cpu context | 27 | + * @p: host address |
38 | + * @host_pc: the host pc within the translation | 28 | + * @val: shifted value to store |
39 | + * @data: output data | 29 | + * @msk: mask for value to store |
40 | + * | 30 | + * |
41 | + * Attempt to load the the unwind state for a host pc occurring in | 31 | + * Atomically store @val to @p masked by @msk. |
42 | + * translated code. If @host_pc is not in translated code, the | ||
43 | + * function returns false; otherwise @data is loaded. | ||
44 | + * This is the same unwind info as given to restore_state_to_opc. | ||
45 | + */ | 32 | + */ |
46 | +bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data); | 33 | +static inline void ATTRIBUTE_ATOMIC128_OPT |
34 | +store_atom_insert_al16(Int128 *ps, Int128 val, Int128 msk) | ||
35 | +{ | ||
36 | +#if defined(CONFIG_ATOMIC128) | ||
37 | + __uint128_t *pu; | ||
38 | + Int128Alias old, new; | ||
47 | + | 39 | + |
48 | /** | 40 | + /* With CONFIG_ATOMIC128, we can avoid the memory barriers. */ |
49 | * cpu_restore_state: | 41 | + pu = __builtin_assume_aligned(ps, 16); |
50 | - * @cpu: the vCPU state is to be restore to | 42 | + old.u = *pu; |
51 | - * @searched_pc: the host PC the fault occurred at | 43 | + msk = int128_not(msk); |
52 | + * @cpu: the cpu context | 44 | + do { |
53 | + * @host_pc: the host pc within the translation | 45 | + new.s = int128_and(old.s, msk); |
54 | * @will_exit: true if the TB executed will be interrupted after some | 46 | + new.s = int128_or(new.s, val); |
55 | cpu adjustments. Required for maintaining the correct | 47 | + } while (!__atomic_compare_exchange_n(pu, &old.u, new.u, true, |
56 | icount valus | 48 | + __ATOMIC_RELAXED, __ATOMIC_RELAXED)); |
57 | * @return: true if state was restored, false otherwise | 49 | +#else |
58 | * | 50 | + Int128 old, new, cmp; |
59 | * Attempt to restore the state for a fault occurring in translated | 51 | + |
60 | - * code. If the searched_pc is not in translated code no state is | 52 | + ps = __builtin_assume_aligned(ps, 16); |
61 | + * code. If @host_pc is not in translated code no state is | 53 | + old = *ps; |
62 | * restored and the function returns false. | 54 | + msk = int128_not(msk); |
63 | */ | 55 | + do { |
64 | -bool cpu_restore_state(CPUState *cpu, uintptr_t searched_pc, bool will_exit); | 56 | + cmp = old; |
65 | +bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit); | 57 | + new = int128_and(old, msk); |
66 | 58 | + new = int128_or(new, val); | |
67 | G_NORETURN void cpu_loop_exit_noexc(CPUState *cpu); | 59 | + old = atomic16_cmpxchg(ps, cmp, new); |
68 | G_NORETURN void cpu_loop_exit(CPUState *cpu); | 60 | + } while (int128_ne(cmp, old)); |
69 | diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c | 61 | +#endif |
70 | index XXXXXXX..XXXXXXX 100644 | ||
71 | --- a/accel/tcg/translate-all.c | ||
72 | +++ b/accel/tcg/translate-all.c | ||
73 | @@ -XXX,XX +XXX,XX @@ static int encode_search(TranslationBlock *tb, uint8_t *block) | ||
74 | return p - block; | ||
75 | } | ||
76 | |||
77 | -/* The cpu state corresponding to 'searched_pc' is restored. | ||
78 | - * When reset_icount is true, current TB will be interrupted and | ||
79 | - * icount should be recalculated. | ||
80 | - */ | ||
81 | -int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, | ||
82 | - uintptr_t searched_pc, bool reset_icount) | ||
83 | +static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc, | ||
84 | + uint64_t *data) | ||
85 | { | ||
86 | - uint64_t data[TARGET_INSN_START_WORDS]; | ||
87 | - uintptr_t host_pc = (uintptr_t)tb->tc.ptr; | ||
88 | + uintptr_t iter_pc = (uintptr_t)tb->tc.ptr; | ||
89 | const uint8_t *p = tb->tc.ptr + tb->tc.size; | ||
90 | int i, j, num_insns = tb->icount; | ||
91 | -#ifdef CONFIG_PROFILER | ||
92 | - TCGProfile *prof = &tcg_ctx->prof; | ||
93 | - int64_t ti = profile_getclock(); | ||
94 | -#endif | ||
95 | |||
96 | - searched_pc -= GETPC_ADJ; | ||
97 | + host_pc -= GETPC_ADJ; | ||
98 | |||
99 | - if (searched_pc < host_pc) { | ||
100 | + if (host_pc < iter_pc) { | ||
101 | return -1; | ||
102 | } | ||
103 | |||
104 | - memset(data, 0, sizeof(data)); | ||
105 | + memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS); | ||
106 | if (!TARGET_TB_PCREL) { | ||
107 | data[0] = tb_pc(tb); | ||
108 | } | ||
109 | |||
110 | - /* Reconstruct the stored insn data while looking for the point at | ||
111 | - which the end of the insn exceeds the searched_pc. */ | ||
112 | + /* | ||
113 | + * Reconstruct the stored insn data while looking for the point | ||
114 | + * at which the end of the insn exceeds host_pc. | ||
115 | + */ | ||
116 | for (i = 0; i < num_insns; ++i) { | ||
117 | for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { | ||
118 | data[j] += decode_sleb128(&p); | ||
119 | } | ||
120 | - host_pc += decode_sleb128(&p); | ||
121 | - if (host_pc > searched_pc) { | ||
122 | - goto found; | ||
123 | + iter_pc += decode_sleb128(&p); | ||
124 | + if (iter_pc > host_pc) { | ||
125 | + return num_insns - i; | ||
126 | } | ||
127 | } | ||
128 | return -1; | ||
129 | +} | 62 | +} |
130 | + | 63 | + |
131 | +/* | 64 | +#endif /* HOST_STORE_INSERT_AL16_H */ |
132 | + * The cpu state corresponding to 'host_pc' is restored. | 65 | diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc |
133 | + * When reset_icount is true, current TB will be interrupted and | 66 | index XXXXXXX..XXXXXXX 100644 |
134 | + * icount should be recalculated. | 67 | --- a/accel/tcg/ldst_atomicity.c.inc |
135 | + */ | 68 | +++ b/accel/tcg/ldst_atomicity.c.inc |
136 | +void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, | 69 | @@ -XXX,XX +XXX,XX @@ |
137 | + uintptr_t host_pc, bool reset_icount) | 70 | */ |
138 | +{ | 71 | |
139 | + uint64_t data[TARGET_INSN_START_WORDS]; | 72 | #include "host/load-extract-al16-al8.h" |
140 | +#ifdef CONFIG_PROFILER | 73 | +#include "host/store-insert-al16.h" |
141 | + TCGProfile *prof = &tcg_ctx->prof; | 74 | |
142 | + int64_t ti = profile_getclock(); | 75 | #ifdef CONFIG_ATOMIC64 |
143 | +#endif | 76 | # define HAVE_al8 true |
144 | + int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data); | 77 | @@ -XXX,XX +XXX,XX @@ static void store_atom_insert_al8(uint64_t *p, uint64_t val, uint64_t msk) |
145 | + | 78 | __ATOMIC_RELAXED, __ATOMIC_RELAXED)); |
146 | + if (insns_left < 0) { | ||
147 | + return; | ||
148 | + } | ||
149 | |||
150 | - found: | ||
151 | if (reset_icount && (tb_cflags(tb) & CF_USE_ICOUNT)) { | ||
152 | assert(icount_enabled()); | ||
153 | - /* Reset the cycle counter to the start of the block | ||
154 | - and shift if to the number of actually executed instructions */ | ||
155 | - cpu_neg(cpu)->icount_decr.u16.low += num_insns - i; | ||
156 | + /* | ||
157 | + * Reset the cycle counter to the start of the block and | ||
158 | + * shift if to the number of actually executed instructions. | ||
159 | + */ | ||
160 | + cpu_neg(cpu)->icount_decr.u16.low += insns_left; | ||
161 | } | ||
162 | |||
163 | cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data); | ||
164 | @@ -XXX,XX +XXX,XX @@ int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, | ||
165 | prof->restore_time + profile_getclock() - ti); | ||
166 | qatomic_set(&prof->restore_count, prof->restore_count + 1); | ||
167 | #endif | ||
168 | - return 0; | ||
169 | } | 79 | } |
170 | 80 | ||
171 | bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit) | 81 | -/** |
172 | @@ -XXX,XX +XXX,XX @@ bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit) | 82 | - * store_atom_insert_al16: |
173 | return false; | 83 | - * @p: host address |
174 | } | 84 | - * @val: shifted value to store |
175 | 85 | - * @msk: mask for value to store | |
176 | +bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data) | 86 | - * |
177 | +{ | 87 | - * Atomically store @val to @p masked by @msk. |
178 | + if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) { | 88 | - */ |
179 | + TranslationBlock *tb = tcg_tb_lookup(host_pc); | 89 | -static void ATTRIBUTE_ATOMIC128_OPT |
180 | + if (tb) { | 90 | -store_atom_insert_al16(Int128 *ps, Int128Alias val, Int128Alias msk) |
181 | + return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0; | 91 | -{ |
182 | + } | 92 | -#if defined(CONFIG_ATOMIC128) |
183 | + } | 93 | - __uint128_t *pu, old, new; |
184 | + return false; | 94 | - |
185 | +} | 95 | - /* With CONFIG_ATOMIC128, we can avoid the memory barriers. */ |
186 | + | 96 | - pu = __builtin_assume_aligned(ps, 16); |
187 | void page_init(void) | 97 | - old = *pu; |
188 | { | 98 | - do { |
189 | page_size_init(); | 99 | - new = (old & ~msk.u) | val.u; |
100 | - } while (!__atomic_compare_exchange_n(pu, &old, new, true, | ||
101 | - __ATOMIC_RELAXED, __ATOMIC_RELAXED)); | ||
102 | -#elif defined(CONFIG_CMPXCHG128) | ||
103 | - __uint128_t *pu, old, new; | ||
104 | - | ||
105 | - /* | ||
106 | - * Without CONFIG_ATOMIC128, __atomic_compare_exchange_n will always | ||
107 | - * defer to libatomic, so we must use __sync_*_compare_and_swap_16 | ||
108 | - * and accept the sequential consistency that comes with it. | ||
109 | - */ | ||
110 | - pu = __builtin_assume_aligned(ps, 16); | ||
111 | - do { | ||
112 | - old = *pu; | ||
113 | - new = (old & ~msk.u) | val.u; | ||
114 | - } while (!__sync_bool_compare_and_swap_16(pu, old, new)); | ||
115 | -#else | ||
116 | - qemu_build_not_reached(); | ||
117 | -#endif | ||
118 | -} | ||
119 | - | ||
120 | /** | ||
121 | * store_bytes_leN: | ||
122 | * @pv: host address | ||
190 | -- | 123 | -- |
191 | 2.34.1 | 124 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
2 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
3 | --- | ||
4 | .../x86_64/host/load-extract-al16-al8.h | 50 +++++++++++++++++++ | ||
5 | 1 file changed, 50 insertions(+) | ||
6 | create mode 100644 host/include/x86_64/host/load-extract-al16-al8.h | ||
1 | 7 | ||
8 | diff --git a/host/include/x86_64/host/load-extract-al16-al8.h b/host/include/x86_64/host/load-extract-al16-al8.h | ||
9 | new file mode 100644 | ||
10 | index XXXXXXX..XXXXXXX | ||
11 | --- /dev/null | ||
12 | +++ b/host/include/x86_64/host/load-extract-al16-al8.h | ||
13 | @@ -XXX,XX +XXX,XX @@ | ||
14 | +/* | ||
15 | + * SPDX-License-Identifier: GPL-2.0-or-later | ||
16 | + * Atomic extract 64 from 128-bit, x86_64 version. | ||
17 | + * | ||
18 | + * Copyright (C) 2023 Linaro, Ltd. | ||
19 | + */ | ||
20 | + | ||
21 | +#ifndef X86_64_LOAD_EXTRACT_AL16_AL8_H | ||
22 | +#define X86_64_LOAD_EXTRACT_AL16_AL8_H | ||
23 | + | ||
24 | +#ifdef CONFIG_INT128_TYPE | ||
25 | +#include "host/cpuinfo.h" | ||
26 | + | ||
27 | +/** | ||
28 | + * load_atom_extract_al16_or_al8: | ||
29 | + * @pv: host address | ||
30 | + * @s: object size in bytes, @s <= 8. | ||
31 | + * | ||
32 | + * Load @s bytes from @pv, when pv % s != 0. If [p, p+s-1] does not | ||
33 | + * cross an 16-byte boundary then the access must be 16-byte atomic, | ||
34 | + * otherwise the access must be 8-byte atomic. | ||
35 | + */ | ||
36 | +static inline uint64_t ATTRIBUTE_ATOMIC128_OPT | ||
37 | +load_atom_extract_al16_or_al8(void *pv, int s) | ||
38 | +{ | ||
39 | + uintptr_t pi = (uintptr_t)pv; | ||
40 | + __int128_t *ptr_align = (__int128_t *)(pi & ~7); | ||
41 | + int shr = (pi & 7) * 8; | ||
42 | + Int128Alias r; | ||
43 | + | ||
44 | + /* | ||
45 | + * ptr_align % 16 is now only 0 or 8. | ||
46 | + * If the host supports atomic loads with VMOVDQU, then always use that, | ||
47 | + * making the branch highly predictable. Otherwise we must use VMOVDQA | ||
48 | + * when ptr_align % 16 == 0 for 16-byte atomicity. | ||
49 | + */ | ||
50 | + if ((cpuinfo & CPUINFO_ATOMIC_VMOVDQU) || (pi & 8)) { | ||
51 | + asm("vmovdqu %1, %0" : "=x" (r.i) : "m" (*ptr_align)); | ||
52 | + } else { | ||
53 | + asm("vmovdqa %1, %0" : "=x" (r.i) : "m" (*ptr_align)); | ||
54 | + } | ||
55 | + return int128_getlo(int128_urshift(r.s, shr)); | ||
56 | +} | ||
57 | +#else | ||
58 | +/* Fallback definition that must be optimized away, or error. */ | ||
59 | +uint64_t QEMU_ERROR("unsupported atomic") | ||
60 | + load_atom_extract_al16_or_al8(void *pv, int s); | ||
61 | +#endif | ||
62 | + | ||
63 | +#endif /* X86_64_LOAD_EXTRACT_AL16_AL8_H */ | ||
64 | -- | ||
65 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
2 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
3 | --- | ||
4 | .../aarch64/host/load-extract-al16-al8.h | 40 +++++++++++++++++++ | ||
5 | 1 file changed, 40 insertions(+) | ||
6 | create mode 100644 host/include/aarch64/host/load-extract-al16-al8.h | ||
1 | 7 | ||
8 | diff --git a/host/include/aarch64/host/load-extract-al16-al8.h b/host/include/aarch64/host/load-extract-al16-al8.h | ||
9 | new file mode 100644 | ||
10 | index XXXXXXX..XXXXXXX | ||
11 | --- /dev/null | ||
12 | +++ b/host/include/aarch64/host/load-extract-al16-al8.h | ||
13 | @@ -XXX,XX +XXX,XX @@ | ||
14 | +/* | ||
15 | + * SPDX-License-Identifier: GPL-2.0-or-later | ||
16 | + * Atomic extract 64 from 128-bit, AArch64 version. | ||
17 | + * | ||
18 | + * Copyright (C) 2023 Linaro, Ltd. | ||
19 | + */ | ||
20 | + | ||
21 | +#ifndef AARCH64_LOAD_EXTRACT_AL16_AL8_H | ||
22 | +#define AARCH64_LOAD_EXTRACT_AL16_AL8_H | ||
23 | + | ||
24 | +#include "host/cpuinfo.h" | ||
25 | +#include "tcg/debug-assert.h" | ||
26 | + | ||
27 | +/** | ||
28 | + * load_atom_extract_al16_or_al8: | ||
29 | + * @pv: host address | ||
30 | + * @s: object size in bytes, @s <= 8. | ||
31 | + * | ||
32 | + * Load @s bytes from @pv, when pv % s != 0. If [p, p+s-1] does not | ||
33 | + * cross an 16-byte boundary then the access must be 16-byte atomic, | ||
34 | + * otherwise the access must be 8-byte atomic. | ||
35 | + */ | ||
36 | +static inline uint64_t load_atom_extract_al16_or_al8(void *pv, int s) | ||
37 | +{ | ||
38 | + uintptr_t pi = (uintptr_t)pv; | ||
39 | + __int128_t *ptr_align = (__int128_t *)(pi & ~7); | ||
40 | + int shr = (pi & 7) * 8; | ||
41 | + uint64_t l, h; | ||
42 | + | ||
43 | + /* | ||
44 | + * With FEAT_LSE2, LDP is single-copy atomic if 16-byte aligned | ||
45 | + * and single-copy atomic on the parts if 8-byte aligned. | ||
46 | + * All we need do is align the pointer mod 8. | ||
47 | + */ | ||
48 | + tcg_debug_assert(HAVE_ATOMIC128_RO); | ||
49 | + asm("ldp %0, %1, %2" : "=r"(l), "=r"(h) : "m"(*ptr_align)); | ||
50 | + return (l >> shr) | (h << (-shr & 63)); | ||
51 | +} | ||
52 | + | ||
53 | +#endif /* AARCH64_LOAD_EXTRACT_AL16_AL8_H */ | ||
54 | -- | ||
55 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
2 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
3 | --- | ||
4 | host/include/aarch64/host/store-insert-al16.h | 47 +++++++++++++++++++ | ||
5 | 1 file changed, 47 insertions(+) | ||
6 | create mode 100644 host/include/aarch64/host/store-insert-al16.h | ||
1 | 7 | ||
8 | diff --git a/host/include/aarch64/host/store-insert-al16.h b/host/include/aarch64/host/store-insert-al16.h | ||
9 | new file mode 100644 | ||
10 | index XXXXXXX..XXXXXXX | ||
11 | --- /dev/null | ||
12 | +++ b/host/include/aarch64/host/store-insert-al16.h | ||
13 | @@ -XXX,XX +XXX,XX @@ | ||
14 | +/* | ||
15 | + * SPDX-License-Identifier: GPL-2.0-or-later | ||
16 | + * Atomic store insert into 128-bit, AArch64 version. | ||
17 | + * | ||
18 | + * Copyright (C) 2023 Linaro, Ltd. | ||
19 | + */ | ||
20 | + | ||
21 | +#ifndef AARCH64_STORE_INSERT_AL16_H | ||
22 | +#define AARCH64_STORE_INSERT_AL16_H | ||
23 | + | ||
24 | +/** | ||
25 | + * store_atom_insert_al16: | ||
26 | + * @p: host address | ||
27 | + * @val: shifted value to store | ||
28 | + * @msk: mask for value to store | ||
29 | + * | ||
30 | + * Atomically store @val to @p masked by @msk. | ||
31 | + */ | ||
32 | +static inline void ATTRIBUTE_ATOMIC128_OPT | ||
33 | +store_atom_insert_al16(Int128 *ps, Int128 val, Int128 msk) | ||
34 | +{ | ||
35 | + /* | ||
36 | + * GCC only implements __sync* primitives for int128 on aarch64. | ||
37 | + * We can do better without the barriers, and integrating the | ||
38 | + * arithmetic into the load-exclusive/store-conditional pair. | ||
39 | + */ | ||
40 | + uint64_t tl, th, vl, vh, ml, mh; | ||
41 | + uint32_t fail; | ||
42 | + | ||
43 | + qemu_build_assert(!HOST_BIG_ENDIAN); | ||
44 | + vl = int128_getlo(val); | ||
45 | + vh = int128_gethi(val); | ||
46 | + ml = int128_getlo(msk); | ||
47 | + mh = int128_gethi(msk); | ||
48 | + | ||
49 | + asm("0: ldxp %[l], %[h], %[mem]\n\t" | ||
50 | + "bic %[l], %[l], %[ml]\n\t" | ||
51 | + "bic %[h], %[h], %[mh]\n\t" | ||
52 | + "orr %[l], %[l], %[vl]\n\t" | ||
53 | + "orr %[h], %[h], %[vh]\n\t" | ||
54 | + "stxp %w[f], %[l], %[h], %[mem]\n\t" | ||
55 | + "cbnz %w[f], 0b\n" | ||
56 | + : [mem] "+Q"(*ps), [f] "=&r"(fail), [l] "=&r"(tl), [h] "=&r"(th) | ||
57 | + : [vl] "r"(vl), [vh] "r"(vh), [ml] "r"(ml), [mh] "r"(mh)); | ||
58 | +} | ||
59 | + | ||
60 | +#endif /* AARCH64_STORE_INSERT_AL16_H */ | ||
61 | -- | ||
62 | 2.34.1 | diff view generated by jsdifflib |
1 | We have called cpu_restore_state asserting will_exit. | 1 | The last use was removed by e77c89fb086a. |
---|---|---|---|
2 | Do not go back on that promise. This affects icount. | ||
3 | 2 | ||
3 | Fixes: e77c89fb086a ("cputlb: Remove static tlb sizing") | ||
4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | 4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
6 | --- | 6 | --- |
7 | target/openrisc/sys_helper.c | 2 +- | 7 | tcg/aarch64/tcg-target.h | 1 - |
8 | 1 file changed, 1 insertion(+), 1 deletion(-) | 8 | tcg/arm/tcg-target.h | 1 - |
9 | tcg/i386/tcg-target.h | 1 - | ||
10 | tcg/mips/tcg-target.h | 1 - | ||
11 | tcg/ppc/tcg-target.h | 1 - | ||
12 | tcg/riscv/tcg-target.h | 1 - | ||
13 | tcg/s390x/tcg-target.h | 1 - | ||
14 | tcg/sparc64/tcg-target.h | 1 - | ||
15 | tcg/tci/tcg-target.h | 1 - | ||
16 | 9 files changed, 9 deletions(-) | ||
9 | 17 | ||
10 | diff --git a/target/openrisc/sys_helper.c b/target/openrisc/sys_helper.c | 18 | diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h |
11 | index XXXXXXX..XXXXXXX 100644 | 19 | index XXXXXXX..XXXXXXX 100644 |
12 | --- a/target/openrisc/sys_helper.c | 20 | --- a/tcg/aarch64/tcg-target.h |
13 | +++ b/target/openrisc/sys_helper.c | 21 | +++ b/tcg/aarch64/tcg-target.h |
14 | @@ -XXX,XX +XXX,XX @@ void HELPER(mtspr)(CPUOpenRISCState *env, target_ulong spr, target_ulong rb) | 22 | @@ -XXX,XX +XXX,XX @@ |
15 | if (env->pc != rb) { | 23 | #include "host/cpuinfo.h" |
16 | env->pc = rb; | 24 | |
17 | env->dflag = 0; | 25 | #define TCG_TARGET_INSN_UNIT_SIZE 4 |
18 | - cpu_loop_exit(cs); | 26 | -#define TCG_TARGET_TLB_DISPLACEMENT_BITS 24 |
19 | } | 27 | #define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) |
20 | + cpu_loop_exit(cs); | 28 | |
21 | break; | 29 | typedef enum { |
22 | 30 | diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h | |
23 | case TO_SPR(0, 17): /* SR */ | 31 | index XXXXXXX..XXXXXXX 100644 |
32 | --- a/tcg/arm/tcg-target.h | ||
33 | +++ b/tcg/arm/tcg-target.h | ||
34 | @@ -XXX,XX +XXX,XX @@ extern int arm_arch; | ||
35 | #define use_armv7_instructions (__ARM_ARCH >= 7 || arm_arch >= 7) | ||
36 | |||
37 | #define TCG_TARGET_INSN_UNIT_SIZE 4 | ||
38 | -#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16 | ||
39 | #define MAX_CODE_GEN_BUFFER_SIZE UINT32_MAX | ||
40 | |||
41 | typedef enum { | ||
42 | diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/tcg/i386/tcg-target.h | ||
45 | +++ b/tcg/i386/tcg-target.h | ||
46 | @@ -XXX,XX +XXX,XX @@ | ||
47 | #include "host/cpuinfo.h" | ||
48 | |||
49 | #define TCG_TARGET_INSN_UNIT_SIZE 1 | ||
50 | -#define TCG_TARGET_TLB_DISPLACEMENT_BITS 31 | ||
51 | |||
52 | #ifdef __x86_64__ | ||
53 | # define TCG_TARGET_REG_BITS 64 | ||
54 | diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h | ||
55 | index XXXXXXX..XXXXXXX 100644 | ||
56 | --- a/tcg/mips/tcg-target.h | ||
57 | +++ b/tcg/mips/tcg-target.h | ||
58 | @@ -XXX,XX +XXX,XX @@ | ||
59 | #endif | ||
60 | |||
61 | #define TCG_TARGET_INSN_UNIT_SIZE 4 | ||
62 | -#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16 | ||
63 | #define TCG_TARGET_NB_REGS 32 | ||
64 | |||
65 | #define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) | ||
66 | diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h | ||
67 | index XXXXXXX..XXXXXXX 100644 | ||
68 | --- a/tcg/ppc/tcg-target.h | ||
69 | +++ b/tcg/ppc/tcg-target.h | ||
70 | @@ -XXX,XX +XXX,XX @@ | ||
71 | |||
72 | #define TCG_TARGET_NB_REGS 64 | ||
73 | #define TCG_TARGET_INSN_UNIT_SIZE 4 | ||
74 | -#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16 | ||
75 | |||
76 | typedef enum { | ||
77 | TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3, | ||
78 | diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h | ||
79 | index XXXXXXX..XXXXXXX 100644 | ||
80 | --- a/tcg/riscv/tcg-target.h | ||
81 | +++ b/tcg/riscv/tcg-target.h | ||
82 | @@ -XXX,XX +XXX,XX @@ | ||
83 | #define TCG_TARGET_REG_BITS 64 | ||
84 | |||
85 | #define TCG_TARGET_INSN_UNIT_SIZE 4 | ||
86 | -#define TCG_TARGET_TLB_DISPLACEMENT_BITS 20 | ||
87 | #define TCG_TARGET_NB_REGS 32 | ||
88 | #define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) | ||
89 | |||
90 | diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h | ||
91 | index XXXXXXX..XXXXXXX 100644 | ||
92 | --- a/tcg/s390x/tcg-target.h | ||
93 | +++ b/tcg/s390x/tcg-target.h | ||
94 | @@ -XXX,XX +XXX,XX @@ | ||
95 | #define S390_TCG_TARGET_H | ||
96 | |||
97 | #define TCG_TARGET_INSN_UNIT_SIZE 2 | ||
98 | -#define TCG_TARGET_TLB_DISPLACEMENT_BITS 19 | ||
99 | |||
100 | /* We have a +- 4GB range on the branches; leave some slop. */ | ||
101 | #define MAX_CODE_GEN_BUFFER_SIZE (3 * GiB) | ||
102 | diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h | ||
103 | index XXXXXXX..XXXXXXX 100644 | ||
104 | --- a/tcg/sparc64/tcg-target.h | ||
105 | +++ b/tcg/sparc64/tcg-target.h | ||
106 | @@ -XXX,XX +XXX,XX @@ | ||
107 | #define SPARC_TCG_TARGET_H | ||
108 | |||
109 | #define TCG_TARGET_INSN_UNIT_SIZE 4 | ||
110 | -#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32 | ||
111 | #define TCG_TARGET_NB_REGS 32 | ||
112 | #define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB) | ||
113 | |||
114 | diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h | ||
115 | index XXXXXXX..XXXXXXX 100644 | ||
116 | --- a/tcg/tci/tcg-target.h | ||
117 | +++ b/tcg/tci/tcg-target.h | ||
118 | @@ -XXX,XX +XXX,XX @@ | ||
119 | |||
120 | #define TCG_TARGET_INTERPRETER 1 | ||
121 | #define TCG_TARGET_INSN_UNIT_SIZE 4 | ||
122 | -#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32 | ||
123 | #define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) | ||
124 | |||
125 | #if UINTPTR_MAX == UINT32_MAX | ||
24 | -- | 126 | -- |
25 | 2.34.1 | 127 | 2.34.1 |
26 | 128 | ||
27 | 129 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Invert the exit code, for use with the testsuite. | ||
1 | 2 | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
4 | --- | ||
5 | scripts/decodetree.py | 9 +++++++-- | ||
6 | 1 file changed, 7 insertions(+), 2 deletions(-) | ||
7 | |||
8 | diff --git a/scripts/decodetree.py b/scripts/decodetree.py | ||
9 | index XXXXXXX..XXXXXXX 100644 | ||
10 | --- a/scripts/decodetree.py | ||
11 | +++ b/scripts/decodetree.py | ||
12 | @@ -XXX,XX +XXX,XX @@ | ||
13 | formats = {} | ||
14 | allpatterns = [] | ||
15 | anyextern = False | ||
16 | +testforerror = False | ||
17 | |||
18 | translate_prefix = 'trans' | ||
19 | translate_scope = 'static ' | ||
20 | @@ -XXX,XX +XXX,XX @@ def error_with_file(file, lineno, *args): | ||
21 | if output_file and output_fd: | ||
22 | output_fd.close() | ||
23 | os.remove(output_file) | ||
24 | - exit(1) | ||
25 | + exit(0 if testforerror else 1) | ||
26 | # end error_with_file | ||
27 | |||
28 | |||
29 | @@ -XXX,XX +XXX,XX @@ def main(): | ||
30 | global bitop_width | ||
31 | global variablewidth | ||
32 | global anyextern | ||
33 | + global testforerror | ||
34 | |||
35 | decode_scope = 'static ' | ||
36 | |||
37 | long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=', | ||
38 | - 'static-decode=', 'varinsnwidth='] | ||
39 | + 'static-decode=', 'varinsnwidth=', 'test-for-error'] | ||
40 | try: | ||
41 | (opts, args) = getopt.gnu_getopt(sys.argv[1:], 'o:vw:', long_opts) | ||
42 | except getopt.GetoptError as err: | ||
43 | @@ -XXX,XX +XXX,XX @@ def main(): | ||
44 | bitop_width = 64 | ||
45 | elif insnwidth != 32: | ||
46 | error(0, 'cannot handle insns of width', insnwidth) | ||
47 | + elif o == '--test-for-error': | ||
48 | + testforerror = True | ||
49 | else: | ||
50 | assert False, 'unhandled option' | ||
51 | |||
52 | @@ -XXX,XX +XXX,XX @@ def main(): | ||
53 | |||
54 | if output_file: | ||
55 | output_fd.close() | ||
56 | + exit(1 if testforerror else 0) | ||
57 | # end main | ||
58 | |||
59 | |||
60 | -- | ||
61 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Two copy-paste errors walking the parse tree. | ||
1 | 2 | ||
3 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
4 | --- | ||
5 | scripts/decodetree.py | 4 ++-- | ||
6 | 1 file changed, 2 insertions(+), 2 deletions(-) | ||
7 | |||
8 | diff --git a/scripts/decodetree.py b/scripts/decodetree.py | ||
9 | index XXXXXXX..XXXXXXX 100644 | ||
10 | --- a/scripts/decodetree.py | ||
11 | +++ b/scripts/decodetree.py | ||
12 | @@ -XXX,XX +XXX,XX @@ def build_tree(self): | ||
13 | |||
14 | def prop_format(self): | ||
15 | for p in self.pats: | ||
16 | - p.build_tree() | ||
17 | + p.prop_format() | ||
18 | |||
19 | def prop_width(self): | ||
20 | width = None | ||
21 | @@ -XXX,XX +XXX,XX @@ def __build_tree(pats, outerbits, outermask): | ||
22 | return t | ||
23 | |||
24 | def build_tree(self): | ||
25 | - super().prop_format() | ||
26 | + super().build_tree() | ||
27 | self.tree = self.__build_tree(self.pats, self.fixedbits, | ||
28 | self.fixedmask) | ||
29 | |||
30 | -- | ||
31 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Test err_pattern_group_empty.decode failed with exception: | ||
1 | 2 | ||
3 | Traceback (most recent call last): | ||
4 | File "./scripts/decodetree.py", line 1424, in <module> main() | ||
5 | File "./scripts/decodetree.py", line 1342, in main toppat.build_tree() | ||
6 | File "./scripts/decodetree.py", line 627, in build_tree | ||
7 | self.tree = self.__build_tree(self.pats, self.fixedbits, | ||
8 | File "./scripts/decodetree.py", line 607, in __build_tree | ||
9 | fb = i.fixedbits & innermask | ||
10 | TypeError: unsupported operand type(s) for &: 'NoneType' and 'int' | ||
11 | |||
12 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
13 | --- | ||
14 | scripts/decodetree.py | 6 ++++++ | ||
15 | 1 file changed, 6 insertions(+) | ||
16 | |||
17 | diff --git a/scripts/decodetree.py b/scripts/decodetree.py | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/scripts/decodetree.py | ||
20 | +++ b/scripts/decodetree.py | ||
21 | @@ -XXX,XX +XXX,XX @@ def output_code(self, i, extracted, outerbits, outermask): | ||
22 | output(ind, '}\n') | ||
23 | else: | ||
24 | p.output_code(i, extracted, p.fixedbits, p.fixedmask) | ||
25 | + | ||
26 | + def build_tree(self): | ||
27 | + if not self.pats: | ||
28 | + error_with_file(self.file, self.lineno, 'empty pattern group') | ||
29 | + super().build_tree() | ||
30 | + | ||
31 | #end IncMultiPattern | ||
32 | |||
33 | |||
34 | -- | ||
35 | 2.34.1 | diff view generated by jsdifflib |
1 | Since we do not plan to exit, use cpu_unwind_state_data | 1 | Nor report any PermissionError on remove. |
---|---|---|---|
2 | and extract exactly the data requested. | 2 | The primary purpose is testing with -o /dev/null. |
3 | |||
4 | This is a bug fix, in that we no longer clobber dflag. | ||
5 | |||
6 | Consider: | ||
7 | |||
8 | l.j L2 // branch | ||
9 | l.mfspr r1, ppc // delay | ||
10 | |||
11 | L1: boom | ||
12 | L2: l.lwa r3, (r4) | ||
13 | |||
14 | Here, dflag would be set by cpu_restore_state (because that is the current | ||
15 | state of the cpu), but but not cleared by tb_stop on exiting the TB | ||
16 | (because DisasContext has recorded the current value as zero). | ||
17 | |||
18 | The next TB begins at L2 with dflag incorrectly set. If the load has a | ||
19 | tlb miss, then the exception will be delivered as per a delay slot: | ||
20 | with DSX set in the status register and PC decremented (delay slots | ||
21 | restart by re-executing the branch). This will cause the return from | ||
22 | interrupt to go to L1, and boom! | ||
23 | 3 | ||
24 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
25 | --- | 5 | --- |
26 | target/openrisc/sys_helper.c | 11 +++++++++-- | 6 | scripts/decodetree.py | 7 ++++++- |
27 | 1 file changed, 9 insertions(+), 2 deletions(-) | 7 | 1 file changed, 6 insertions(+), 1 deletion(-) |
28 | 8 | ||
29 | diff --git a/target/openrisc/sys_helper.c b/target/openrisc/sys_helper.c | 9 | diff --git a/scripts/decodetree.py b/scripts/decodetree.py |
30 | index XXXXXXX..XXXXXXX 100644 | 10 | index XXXXXXX..XXXXXXX 100644 |
31 | --- a/target/openrisc/sys_helper.c | 11 | --- a/scripts/decodetree.py |
32 | +++ b/target/openrisc/sys_helper.c | 12 | +++ b/scripts/decodetree.py |
33 | @@ -XXX,XX +XXX,XX @@ target_ulong HELPER(mfspr)(CPUOpenRISCState *env, target_ulong rd, | 13 | @@ -XXX,XX +XXX,XX @@ def error_with_file(file, lineno, *args): |
34 | target_ulong spr) | 14 | |
35 | { | 15 | if output_file and output_fd: |
36 | #ifndef CONFIG_USER_ONLY | 16 | output_fd.close() |
37 | + uint64_t data[TARGET_INSN_START_WORDS]; | 17 | - os.remove(output_file) |
38 | MachineState *ms = MACHINE(qdev_get_machine()); | 18 | + # Do not try to remove e.g. -o /dev/null |
39 | OpenRISCCPU *cpu = env_archcpu(env); | 19 | + if not output_file.startswith("/dev"): |
40 | CPUState *cs = env_cpu(env); | 20 | + try: |
41 | @@ -XXX,XX +XXX,XX @@ target_ulong HELPER(mfspr)(CPUOpenRISCState *env, target_ulong rd, | 21 | + os.remove(output_file) |
42 | return env->evbar; | 22 | + except PermissionError: |
43 | 23 | + pass | |
44 | case TO_SPR(0, 16): /* NPC (equals PC) */ | 24 | exit(0 if testforerror else 1) |
45 | - cpu_restore_state(cs, GETPC(), false); | 25 | # end error_with_file |
46 | + if (cpu_unwind_state_data(cs, GETPC(), data)) { | 26 | |
47 | + return data[0]; | ||
48 | + } | ||
49 | return env->pc; | ||
50 | |||
51 | case TO_SPR(0, 17): /* SR */ | ||
52 | return cpu_get_sr(env); | ||
53 | |||
54 | case TO_SPR(0, 18): /* PPC */ | ||
55 | - cpu_restore_state(cs, GETPC(), false); | ||
56 | + if (cpu_unwind_state_data(cs, GETPC(), data)) { | ||
57 | + if (data[1] & 2) { | ||
58 | + return data[0] - 4; | ||
59 | + } | ||
60 | + } | ||
61 | return env->ppc; | ||
62 | |||
63 | case TO_SPR(0, 32): /* EPCR */ | ||
64 | -- | 27 | -- |
65 | 2.34.1 | 28 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
2 | --- | ||
3 | tests/decode/check.sh | 24 ---------------- | ||
4 | tests/decode/meson.build | 59 ++++++++++++++++++++++++++++++++++++++++ | ||
5 | tests/meson.build | 5 +--- | ||
6 | 3 files changed, 60 insertions(+), 28 deletions(-) | ||
7 | delete mode 100755 tests/decode/check.sh | ||
8 | create mode 100644 tests/decode/meson.build | ||
1 | 9 | ||
10 | diff --git a/tests/decode/check.sh b/tests/decode/check.sh | ||
11 | deleted file mode 100755 | ||
12 | index XXXXXXX..XXXXXXX | ||
13 | --- a/tests/decode/check.sh | ||
14 | +++ /dev/null | ||
15 | @@ -XXX,XX +XXX,XX @@ | ||
16 | -#!/bin/sh | ||
17 | -# This work is licensed under the terms of the GNU LGPL, version 2 or later. | ||
18 | -# See the COPYING.LIB file in the top-level directory. | ||
19 | - | ||
20 | -PYTHON=$1 | ||
21 | -DECODETREE=$2 | ||
22 | -E=0 | ||
23 | - | ||
24 | -# All of these tests should produce errors | ||
25 | -for i in err_*.decode; do | ||
26 | - if $PYTHON $DECODETREE $i > /dev/null 2> /dev/null; then | ||
27 | - # Pass, aka failed to fail. | ||
28 | - echo FAIL: $i 1>&2 | ||
29 | - E=1 | ||
30 | - fi | ||
31 | -done | ||
32 | - | ||
33 | -for i in succ_*.decode; do | ||
34 | - if ! $PYTHON $DECODETREE $i > /dev/null 2> /dev/null; then | ||
35 | - echo FAIL:$i 1>&2 | ||
36 | - fi | ||
37 | -done | ||
38 | - | ||
39 | -exit $E | ||
40 | diff --git a/tests/decode/meson.build b/tests/decode/meson.build | ||
41 | new file mode 100644 | ||
42 | index XXXXXXX..XXXXXXX | ||
43 | --- /dev/null | ||
44 | +++ b/tests/decode/meson.build | ||
45 | @@ -XXX,XX +XXX,XX @@ | ||
46 | +err_tests = [ | ||
47 | + 'err_argset1.decode', | ||
48 | + 'err_argset2.decode', | ||
49 | + 'err_field1.decode', | ||
50 | + 'err_field2.decode', | ||
51 | + 'err_field3.decode', | ||
52 | + 'err_field4.decode', | ||
53 | + 'err_field5.decode', | ||
54 | + 'err_field6.decode', | ||
55 | + 'err_init1.decode', | ||
56 | + 'err_init2.decode', | ||
57 | + 'err_init3.decode', | ||
58 | + 'err_init4.decode', | ||
59 | + 'err_overlap1.decode', | ||
60 | + 'err_overlap2.decode', | ||
61 | + 'err_overlap3.decode', | ||
62 | + 'err_overlap4.decode', | ||
63 | + 'err_overlap5.decode', | ||
64 | + 'err_overlap6.decode', | ||
65 | + 'err_overlap7.decode', | ||
66 | + 'err_overlap8.decode', | ||
67 | + 'err_overlap9.decode', | ||
68 | + 'err_pattern_group_empty.decode', | ||
69 | + 'err_pattern_group_ident1.decode', | ||
70 | + 'err_pattern_group_ident2.decode', | ||
71 | + 'err_pattern_group_nest1.decode', | ||
72 | + 'err_pattern_group_nest2.decode', | ||
73 | + 'err_pattern_group_nest3.decode', | ||
74 | + 'err_pattern_group_overlap1.decode', | ||
75 | + 'err_width1.decode', | ||
76 | + 'err_width2.decode', | ||
77 | + 'err_width3.decode', | ||
78 | + 'err_width4.decode', | ||
79 | +] | ||
80 | + | ||
81 | +succ_tests = [ | ||
82 | + 'succ_argset_type1.decode', | ||
83 | + 'succ_function.decode', | ||
84 | + 'succ_ident1.decode', | ||
85 | + 'succ_pattern_group_nest1.decode', | ||
86 | + 'succ_pattern_group_nest2.decode', | ||
87 | + 'succ_pattern_group_nest3.decode', | ||
88 | + 'succ_pattern_group_nest4.decode', | ||
89 | +] | ||
90 | + | ||
91 | +suite = 'decodetree' | ||
92 | +decodetree = find_program(meson.project_source_root() / 'scripts/decodetree.py') | ||
93 | + | ||
94 | +foreach t: err_tests | ||
95 | + test(fs.replace_suffix(t, ''), | ||
96 | + decodetree, args: ['-o', '/dev/null', '--test-for-error', files(t)], | ||
97 | + suite: suite) | ||
98 | +endforeach | ||
99 | + | ||
100 | +foreach t: succ_tests | ||
101 | + test(fs.replace_suffix(t, ''), | ||
102 | + decodetree, args: ['-o', '/dev/null', files(t)], | ||
103 | + suite: suite) | ||
104 | +endforeach | ||
105 | diff --git a/tests/meson.build b/tests/meson.build | ||
106 | index XXXXXXX..XXXXXXX 100644 | ||
107 | --- a/tests/meson.build | ||
108 | +++ b/tests/meson.build | ||
109 | @@ -XXX,XX +XXX,XX @@ if have_tools and have_vhost_user and 'CONFIG_LINUX' in config_host | ||
110 | dependencies: [qemuutil, vhost_user]) | ||
111 | endif | ||
112 | |||
113 | -test('decodetree', sh, | ||
114 | - args: [ files('decode/check.sh'), config_host['PYTHON'], files('../scripts/decodetree.py') ], | ||
115 | - workdir: meson.current_source_dir() / 'decode', | ||
116 | - suite: 'decodetree') | ||
117 | +subdir('decode') | ||
118 | |||
119 | if 'CONFIG_TCG' in config_all | ||
120 | subdir('fp') | ||
121 | -- | ||
122 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Peter Maydell <peter.maydell@linaro.org> | ||
1 | 2 | ||
3 | Document the named field syntax that we want to implement for the | ||
4 | decodetree script. This allows a field to be defined in terms of | ||
5 | some other field that the instruction pattern has already set, for | ||
6 | example: | ||
7 | |||
8 | %sz_imm 10:3 sz:3 !function=expand_sz_imm | ||
9 | |||
10 | to allow a function to be passed both an immediate field from the | ||
11 | instruction and also a sz value which might have been specified by | ||
12 | the instruction pattern directly (sz=1, etc) rather than being a | ||
13 | simple field within the instruction. | ||
14 | |||
15 | Note that the restriction on not having the format referring to the | ||
16 | pattern and the pattern referring to the format simultaneously is a | ||
17 | restriction of the decoder generator rather than inherently being a | ||
18 | silly thing to do. | ||
19 | |||
20 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
21 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
22 | Message-Id: <20230523120447.728365-3-peter.maydell@linaro.org> | ||
23 | --- | ||
24 | docs/devel/decodetree.rst | 33 ++++++++++++++++++++++++++++----- | ||
25 | 1 file changed, 28 insertions(+), 5 deletions(-) | ||
26 | |||
27 | diff --git a/docs/devel/decodetree.rst b/docs/devel/decodetree.rst | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/docs/devel/decodetree.rst | ||
30 | +++ b/docs/devel/decodetree.rst | ||
31 | @@ -XXX,XX +XXX,XX @@ Fields | ||
32 | |||
33 | Syntax:: | ||
34 | |||
35 | - field_def := '%' identifier ( unnamed_field )* ( !function=identifier )? | ||
36 | + field_def := '%' identifier ( field )* ( !function=identifier )? | ||
37 | + field := unnamed_field | named_field | ||
38 | unnamed_field := number ':' ( 's' ) number | ||
39 | + named_field := identifier ':' ( 's' ) number | ||
40 | |||
41 | For *unnamed_field*, the first number is the least-significant bit position | ||
42 | of the field and the second number is the length of the field. If the 's' is | ||
43 | -present, the field is considered signed. If multiple ``unnamed_fields`` are | ||
44 | -present, they are concatenated. In this way one can define disjoint fields. | ||
45 | +present, the field is considered signed. | ||
46 | + | ||
47 | +A *named_field* refers to some other field in the instruction pattern | ||
48 | +or format. Regardless of the length of the other field where it is | ||
49 | +defined, it will be inserted into this field with the specified | ||
50 | +signedness and bit width. | ||
51 | + | ||
52 | +Field definitions that involve loops (i.e. where a field is defined | ||
53 | +directly or indirectly in terms of itself) are errors. | ||
54 | + | ||
55 | +A format can include fields that refer to named fields that are | ||
56 | +defined in the instruction pattern(s) that use the format. | ||
57 | +Conversely, an instruction pattern can include fields that refer to | ||
58 | +named fields that are defined in the format it uses. However you | ||
59 | +cannot currently do both at once (i.e. pattern P uses format F; F has | ||
60 | +a field A that refers to a named field B that is defined in P, and P | ||
61 | +has a field C that refers to a named field D that is defined in F). | ||
62 | + | ||
63 | +If multiple ``fields`` are present, they are concatenated. | ||
64 | +In this way one can define disjoint fields. | ||
65 | |||
66 | If ``!function`` is specified, the concatenated result is passed through the | ||
67 | named function, taking and returning an integral value. | ||
68 | |||
69 | -One may use ``!function`` with zero ``unnamed_fields``. This case is called | ||
70 | +One may use ``!function`` with zero ``fields``. This case is called | ||
71 | a *parameter*, and the named function is only passed the ``DisasContext`` | ||
72 | and returns an integral value extracted from there. | ||
73 | |||
74 | -A field with no ``unnamed_fields`` and no ``!function`` is in error. | ||
75 | +A field with no ``fields`` and no ``!function`` is in error. | ||
76 | |||
77 | Field examples: | ||
78 | |||
79 | @@ -XXX,XX +XXX,XX @@ Field examples: | ||
80 | | %shimm8 5:s8 13:1 | expand_shimm8(sextract(i, 5, 8) << 1 | | | ||
81 | | !function=expand_shimm8 | extract(i, 13, 1)) | | ||
82 | +---------------------------+---------------------------------------------+ | ||
83 | +| %sz_imm 10:2 sz:3 | expand_sz_imm(extract(i, 10, 2) << 3 | | | ||
84 | +| !function=expand_sz_imm | extract(a->sz, 0, 3)) | | ||
85 | ++---------------------------+---------------------------------------------+ | ||
86 | |||
87 | Argument Sets | ||
88 | ============= | ||
89 | -- | ||
90 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Peter Maydell <peter.maydell@linaro.org> | ||
1 | 2 | ||
3 | To support referring to other named fields in field definitions, we | ||
4 | need to pass the str_extract() method a function which tells it how | ||
5 | to emit the code for a previously initialized named field. (In | ||
6 | Pattern::output_code() the other field will be "u.f_foo.field", and | ||
7 | in Format::output_extract() it is "a->field".) | ||
8 | |||
9 | Refactor the two callsites that currently do "output code to | ||
10 | initialize each field", and have them pass a lambda that defines how | ||
11 | to format the lvalue in each case. This is then used both in | ||
12 | emitting the LHS of the assignment and also passed down to | ||
13 | str_extract() as a new argument (unused at the moment, but will be | ||
14 | used in the following patch). | ||
15 | |||
16 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
17 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
18 | Message-Id: <20230523120447.728365-4-peter.maydell@linaro.org> | ||
19 | --- | ||
20 | scripts/decodetree.py | 26 +++++++++++++++----------- | ||
21 | 1 file changed, 15 insertions(+), 11 deletions(-) | ||
22 | |||
23 | diff --git a/scripts/decodetree.py b/scripts/decodetree.py | ||
24 | index XXXXXXX..XXXXXXX 100644 | ||
25 | --- a/scripts/decodetree.py | ||
26 | +++ b/scripts/decodetree.py | ||
27 | @@ -XXX,XX +XXX,XX @@ def __str__(self): | ||
28 | s = '' | ||
29 | return str(self.pos) + ':' + s + str(self.len) | ||
30 | |||
31 | - def str_extract(self): | ||
32 | + def str_extract(self, lvalue_formatter): | ||
33 | global bitop_width | ||
34 | s = 's' if self.sign else '' | ||
35 | return f'{s}extract{bitop_width}(insn, {self.pos}, {self.len})' | ||
36 | @@ -XXX,XX +XXX,XX @@ def __init__(self, subs, mask): | ||
37 | def __str__(self): | ||
38 | return str(self.subs) | ||
39 | |||
40 | - def str_extract(self): | ||
41 | + def str_extract(self, lvalue_formatter): | ||
42 | global bitop_width | ||
43 | ret = '0' | ||
44 | pos = 0 | ||
45 | for f in reversed(self.subs): | ||
46 | - ext = f.str_extract() | ||
47 | + ext = f.str_extract(lvalue_formatter) | ||
48 | if pos == 0: | ||
49 | ret = ext | ||
50 | else: | ||
51 | @@ -XXX,XX +XXX,XX @@ def __init__(self, value): | ||
52 | def __str__(self): | ||
53 | return str(self.value) | ||
54 | |||
55 | - def str_extract(self): | ||
56 | + def str_extract(self, lvalue_formatter): | ||
57 | return str(self.value) | ||
58 | |||
59 | def __cmp__(self, other): | ||
60 | @@ -XXX,XX +XXX,XX @@ def __init__(self, func, base): | ||
61 | def __str__(self): | ||
62 | return self.func + '(' + str(self.base) + ')' | ||
63 | |||
64 | - def str_extract(self): | ||
65 | - return self.func + '(ctx, ' + self.base.str_extract() + ')' | ||
66 | + def str_extract(self, lvalue_formatter): | ||
67 | + return (self.func + '(ctx, ' | ||
68 | + + self.base.str_extract(lvalue_formatter) + ')') | ||
69 | |||
70 | def __eq__(self, other): | ||
71 | return self.func == other.func and self.base == other.base | ||
72 | @@ -XXX,XX +XXX,XX @@ def __init__(self, func): | ||
73 | def __str__(self): | ||
74 | return self.func | ||
75 | |||
76 | - def str_extract(self): | ||
77 | + def str_extract(self, lvalue_formatter): | ||
78 | return self.func + '(ctx)' | ||
79 | |||
80 | def __eq__(self, other): | ||
81 | @@ -XXX,XX +XXX,XX @@ def __str__(self): | ||
82 | |||
83 | def str1(self, i): | ||
84 | return str_indent(i) + self.__str__() | ||
85 | + | ||
86 | + def output_fields(self, indent, lvalue_formatter): | ||
87 | + for n, f in self.fields.items(): | ||
88 | + output(indent, lvalue_formatter(n), ' = ', | ||
89 | + f.str_extract(lvalue_formatter), ';\n') | ||
90 | # end General | ||
91 | |||
92 | |||
93 | @@ -XXX,XX +XXX,XX @@ def extract_name(self): | ||
94 | def output_extract(self): | ||
95 | output('static void ', self.extract_name(), '(DisasContext *ctx, ', | ||
96 | self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n') | ||
97 | - for n, f in self.fields.items(): | ||
98 | - output(' a->', n, ' = ', f.str_extract(), ';\n') | ||
99 | + self.output_fields(str_indent(4), lambda n: 'a->' + n) | ||
100 | output('}\n\n') | ||
101 | # end Format | ||
102 | |||
103 | @@ -XXX,XX +XXX,XX @@ def output_code(self, i, extracted, outerbits, outermask): | ||
104 | if not extracted: | ||
105 | output(ind, self.base.extract_name(), | ||
106 | '(ctx, &u.f_', arg, ', insn);\n') | ||
107 | - for n, f in self.fields.items(): | ||
108 | - output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n') | ||
109 | + self.output_fields(ind, lambda n: 'u.f_' + arg + '.' + n) | ||
110 | output(ind, 'if (', translate_prefix, '_', self.name, | ||
111 | '(ctx, &u.f_', arg, ')) return true;\n') | ||
112 | |||
113 | -- | ||
114 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Peter Maydell <peter.maydell@linaro.org> | ||
1 | 2 | ||
3 | To support named fields, we will need to be able to do a topological | ||
4 | sort (so that we ensure that we output the assignment to field A | ||
5 | before the assignment to field B if field B refers to field A by | ||
6 | name). The good news is that there is a tsort in the python standard | ||
7 | library; the bad news is that it was only added in Python 3.9. | ||
8 | |||
9 | To bridge the gap between our current minimum supported Python | ||
10 | version and 3.9, provide a local implementation that has the | ||
11 | same API as the stdlib version for the parts we care about. | ||
12 | In future when QEMU's minimum Python version requirement reaches | ||
13 | 3.9 we can delete this code and replace it with an 'import' line. | ||
14 | |||
15 | The core of this implementation is based on | ||
16 | https://code.activestate.com/recipes/578272-topological-sort/ | ||
17 | which is MIT-licensed. | ||
18 | |||
19 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
20 | Acked-by: Richard Henderson <richard.henderson@linaro.org> | ||
21 | Message-Id: <20230523120447.728365-5-peter.maydell@linaro.org> | ||
22 | --- | ||
23 | scripts/decodetree.py | 74 +++++++++++++++++++++++++++++++++++++++++++ | ||
24 | 1 file changed, 74 insertions(+) | ||
25 | |||
26 | diff --git a/scripts/decodetree.py b/scripts/decodetree.py | ||
27 | index XXXXXXX..XXXXXXX 100644 | ||
28 | --- a/scripts/decodetree.py | ||
29 | +++ b/scripts/decodetree.py | ||
30 | @@ -XXX,XX +XXX,XX @@ | ||
31 | re_fmt_ident = '@[a-zA-Z0-9_]*' | ||
32 | re_pat_ident = '[a-zA-Z0-9_]*' | ||
33 | |||
34 | +# Local implementation of a topological sort. We use the same API that | ||
35 | +# the Python graphlib does, so that when QEMU moves forward to a | ||
36 | +# baseline of Python 3.9 or newer this code can all be dropped and | ||
37 | +# replaced with: | ||
38 | +# from graphlib import TopologicalSorter, CycleError | ||
39 | +# | ||
40 | +# https://docs.python.org/3.9/library/graphlib.html#graphlib.TopologicalSorter | ||
41 | +# | ||
42 | +# We only implement the parts of TopologicalSorter we care about: | ||
43 | +# ts = TopologicalSorter(graph=None) | ||
44 | +# create the sorter. graph is a dictionary whose keys are | ||
45 | +# nodes and whose values are lists of the predecessors of that node. | ||
46 | +# (That is, if graph contains "A" -> ["B", "C"] then we must output | ||
47 | +# B and C before A.) | ||
48 | +# ts.static_order() | ||
49 | +# returns a list of all the nodes in sorted order, or raises CycleError | ||
50 | +# CycleError | ||
51 | +# exception raised if there are cycles in the graph. The second | ||
52 | +# element in the args attribute is a list of nodes which form a | ||
53 | +# cycle; the first and last element are the same, eg [a, b, c, a] | ||
54 | +# (Our implementation doesn't give the order correctly.) | ||
55 | +# | ||
56 | +# For our purposes we can assume that the data set is always small | ||
57 | +# (typically 10 nodes or less, actual links in the graph very rare), | ||
58 | +# so we don't need to worry about efficiency of implementation. | ||
59 | +# | ||
60 | +# The core of this implementation is from | ||
61 | +# https://code.activestate.com/recipes/578272-topological-sort/ | ||
62 | +# (but updated to Python 3), and is under the MIT license. | ||
63 | + | ||
64 | +class CycleError(ValueError): | ||
65 | + """Subclass of ValueError raised if cycles exist in the graph""" | ||
66 | + pass | ||
67 | + | ||
68 | +class TopologicalSorter: | ||
69 | + """Topologically sort a graph""" | ||
70 | + def __init__(self, graph=None): | ||
71 | + self.graph = graph | ||
72 | + | ||
73 | + def static_order(self): | ||
74 | + # We do the sort right here, unlike the stdlib version | ||
75 | + from functools import reduce | ||
76 | + data = {} | ||
77 | + r = [] | ||
78 | + | ||
79 | + if not self.graph: | ||
80 | + return [] | ||
81 | + | ||
82 | + # This code wants the values in the dict to be specifically sets | ||
83 | + for k, v in self.graph.items(): | ||
84 | + data[k] = set(v) | ||
85 | + | ||
86 | + # Find all items that don't depend on anything. | ||
87 | + extra_items_in_deps = (reduce(set.union, data.values()) | ||
88 | + - set(data.keys())) | ||
89 | + # Add empty dependencies where needed | ||
90 | + data.update({item:{} for item in extra_items_in_deps}) | ||
91 | + while True: | ||
92 | + ordered = set(item for item, dep in data.items() if not dep) | ||
93 | + if not ordered: | ||
94 | + break | ||
95 | + r.extend(ordered) | ||
96 | + data = {item: (dep - ordered) | ||
97 | + for item, dep in data.items() | ||
98 | + if item not in ordered} | ||
99 | + if data: | ||
100 | + # This doesn't give as nice results as the stdlib, which | ||
101 | + # gives you the cycle by listing the nodes in order. Here | ||
102 | + # we only know the nodes in the cycle but not their order. | ||
103 | + raise CycleError(f'nodes are in a cycle', list(data.keys())) | ||
104 | + | ||
105 | + return r | ||
106 | +# end TopologicalSorter | ||
107 | + | ||
108 | def error_with_file(file, lineno, *args): | ||
109 | """Print an error message from file:line and args and exit.""" | ||
110 | global output_file | ||
111 | -- | ||
112 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | From: Peter Maydell <peter.maydell@linaro.org> | |
2 | |||
3 | Implement support for named fields, i.e. where one field is defined | ||
4 | in terms of another, rather than directly in terms of bits extracted | ||
5 | from the instruction. | ||
6 | |||
7 | The new method referenced_fields() on all the Field classes returns a | ||
8 | list of fields that this field references. This just passes through, | ||
9 | except for the new NamedField class. | ||
10 | |||
11 | We can then use referenced_fields() to: | ||
12 | * construct a list of 'dangling references' for a format or | ||
13 | pattern, which is the fields that the format/pattern uses but | ||
14 | doesn't define itself | ||
15 | * do a topological sort, so that we output "field = value" | ||
16 | assignments in an order that means that we assign a field before | ||
17 | we reference it in a subsequent assignment | ||
18 | * check when we output the code for a pattern whether we need to | ||
19 | fill in the format fields before or after the pattern fields, and | ||
20 | do other error checking | ||
21 | |||
22 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
23 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
24 | Message-Id: <20230523120447.728365-6-peter.maydell@linaro.org> | ||
25 | --- | ||
26 | scripts/decodetree.py | 145 ++++++++++++++++++++++++++++++++++++++++-- | ||
27 | 1 file changed, 139 insertions(+), 6 deletions(-) | ||
28 | |||
29 | diff --git a/scripts/decodetree.py b/scripts/decodetree.py | ||
30 | index XXXXXXX..XXXXXXX 100644 | ||
31 | --- a/scripts/decodetree.py | ||
32 | +++ b/scripts/decodetree.py | ||
33 | @@ -XXX,XX +XXX,XX @@ def str_extract(self, lvalue_formatter): | ||
34 | s = 's' if self.sign else '' | ||
35 | return f'{s}extract{bitop_width}(insn, {self.pos}, {self.len})' | ||
36 | |||
37 | + def referenced_fields(self): | ||
38 | + return [] | ||
39 | + | ||
40 | def __eq__(self, other): | ||
41 | return self.sign == other.sign and self.mask == other.mask | ||
42 | |||
43 | @@ -XXX,XX +XXX,XX @@ def str_extract(self, lvalue_formatter): | ||
44 | pos += f.len | ||
45 | return ret | ||
46 | |||
47 | + def referenced_fields(self): | ||
48 | + l = [] | ||
49 | + for f in self.subs: | ||
50 | + l.extend(f.referenced_fields()) | ||
51 | + return l | ||
52 | + | ||
53 | def __ne__(self, other): | ||
54 | if len(self.subs) != len(other.subs): | ||
55 | return True | ||
56 | @@ -XXX,XX +XXX,XX @@ def __str__(self): | ||
57 | def str_extract(self, lvalue_formatter): | ||
58 | return str(self.value) | ||
59 | |||
60 | + def referenced_fields(self): | ||
61 | + return [] | ||
62 | + | ||
63 | def __cmp__(self, other): | ||
64 | return self.value - other.value | ||
65 | # end ConstField | ||
66 | @@ -XXX,XX +XXX,XX @@ def str_extract(self, lvalue_formatter): | ||
67 | return (self.func + '(ctx, ' | ||
68 | + self.base.str_extract(lvalue_formatter) + ')') | ||
69 | |||
70 | + def referenced_fields(self): | ||
71 | + return self.base.referenced_fields() | ||
72 | + | ||
73 | def __eq__(self, other): | ||
74 | return self.func == other.func and self.base == other.base | ||
75 | |||
76 | @@ -XXX,XX +XXX,XX @@ def __str__(self): | ||
77 | def str_extract(self, lvalue_formatter): | ||
78 | return self.func + '(ctx)' | ||
79 | |||
80 | + def referenced_fields(self): | ||
81 | + return [] | ||
82 | + | ||
83 | def __eq__(self, other): | ||
84 | return self.func == other.func | ||
85 | |||
86 | @@ -XXX,XX +XXX,XX @@ def __ne__(self, other): | ||
87 | return not self.__eq__(other) | ||
88 | # end ParameterField | ||
89 | |||
90 | +class NamedField: | ||
91 | + """Class representing a field already named in the pattern""" | ||
92 | + def __init__(self, name, sign, len): | ||
93 | + self.mask = 0 | ||
94 | + self.sign = sign | ||
95 | + self.len = len | ||
96 | + self.name = name | ||
97 | + | ||
98 | + def __str__(self): | ||
99 | + return self.name | ||
100 | + | ||
101 | + def str_extract(self, lvalue_formatter): | ||
102 | + global bitop_width | ||
103 | + s = 's' if self.sign else '' | ||
104 | + lvalue = lvalue_formatter(self.name) | ||
105 | + return f'{s}extract{bitop_width}({lvalue}, 0, {self.len})' | ||
106 | + | ||
107 | + def referenced_fields(self): | ||
108 | + return [self.name] | ||
109 | + | ||
110 | + def __eq__(self, other): | ||
111 | + return self.name == other.name | ||
112 | + | ||
113 | + def __ne__(self, other): | ||
114 | + return not self.__eq__(other) | ||
115 | +# end NamedField | ||
116 | |||
117 | class Arguments: | ||
118 | """Class representing the extracted fields of a format""" | ||
119 | @@ -XXX,XX +XXX,XX @@ def output_def(self): | ||
120 | output('} ', self.struct_name(), ';\n\n') | ||
121 | # end Arguments | ||
122 | |||
123 | - | ||
124 | class General: | ||
125 | """Common code between instruction formats and instruction patterns""" | ||
126 | def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w): | ||
127 | @@ -XXX,XX +XXX,XX @@ def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w): | ||
128 | self.fieldmask = fldm | ||
129 | self.fields = flds | ||
130 | self.width = w | ||
131 | + self.dangling = None | ||
132 | |||
133 | def __str__(self): | ||
134 | return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask) | ||
135 | @@ -XXX,XX +XXX,XX @@ def __str__(self): | ||
136 | def str1(self, i): | ||
137 | return str_indent(i) + self.__str__() | ||
138 | |||
139 | + def dangling_references(self): | ||
140 | + # Return a list of all named references which aren't satisfied | ||
141 | + # directly by this format/pattern. This will be either: | ||
142 | + # * a format referring to a field which is specified by the | ||
143 | + # pattern(s) using it | ||
144 | + # * a pattern referring to a field which is specified by the | ||
145 | + # format it uses | ||
146 | + # * a user error (referring to a field that doesn't exist at all) | ||
147 | + if self.dangling is None: | ||
148 | + # Compute this once and cache the answer | ||
149 | + dangling = [] | ||
150 | + for n, f in self.fields.items(): | ||
151 | + for r in f.referenced_fields(): | ||
152 | + if r not in self.fields: | ||
153 | + dangling.append(r) | ||
154 | + self.dangling = dangling | ||
155 | + return self.dangling | ||
156 | + | ||
157 | def output_fields(self, indent, lvalue_formatter): | ||
158 | + # We use a topological sort to ensure that any use of NamedField | ||
159 | + # comes after the initialization of the field it is referencing. | ||
160 | + graph = {} | ||
161 | for n, f in self.fields.items(): | ||
162 | - output(indent, lvalue_formatter(n), ' = ', | ||
163 | - f.str_extract(lvalue_formatter), ';\n') | ||
164 | + refs = f.referenced_fields() | ||
165 | + graph[n] = refs | ||
166 | + | ||
167 | + try: | ||
168 | + ts = TopologicalSorter(graph) | ||
169 | + for n in ts.static_order(): | ||
170 | + # We only want to emit assignments for the keys | ||
171 | + # in our fields list, not for anything that ends up | ||
172 | + # in the tsort graph only because it was referenced as | ||
173 | + # a NamedField. | ||
174 | + try: | ||
175 | + f = self.fields[n] | ||
176 | + output(indent, lvalue_formatter(n), ' = ', | ||
177 | + f.str_extract(lvalue_formatter), ';\n') | ||
178 | + except KeyError: | ||
179 | + pass | ||
180 | + except CycleError as e: | ||
181 | + # The second element of args is a list of nodes which form | ||
182 | + # a cycle (there might be others too, but only one is reported). | ||
183 | + # Pretty-print it to tell the user. | ||
184 | + cycle = ' => '.join(e.args[1]) | ||
185 | + error(self.lineno, 'field definitions form a cycle: ' + cycle) | ||
186 | # end General | ||
187 | |||
188 | |||
189 | @@ -XXX,XX +XXX,XX @@ def output_code(self, i, extracted, outerbits, outermask): | ||
190 | ind = str_indent(i) | ||
191 | arg = self.base.base.name | ||
192 | output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n') | ||
193 | + # We might have named references in the format that refer to fields | ||
194 | + # in the pattern, or named references in the pattern that refer | ||
195 | + # to fields in the format. This affects whether we extract the fields | ||
196 | + # for the format before or after the ones for the pattern. | ||
197 | + # For simplicity we don't allow cross references in both directions. | ||
198 | + # This is also where we catch the syntax error of referring to | ||
199 | + # a nonexistent field. | ||
200 | + fmt_refs = self.base.dangling_references() | ||
201 | + for r in fmt_refs: | ||
202 | + if r not in self.fields: | ||
203 | + error(self.lineno, f'format refers to undefined field {r}') | ||
204 | + pat_refs = self.dangling_references() | ||
205 | + for r in pat_refs: | ||
206 | + if r not in self.base.fields: | ||
207 | + error(self.lineno, f'pattern refers to undefined field {r}') | ||
208 | + if pat_refs and fmt_refs: | ||
209 | + error(self.lineno, ('pattern that uses fields defined in format ' | ||
210 | + 'cannot use format that uses fields defined ' | ||
211 | + 'in pattern')) | ||
212 | + if fmt_refs: | ||
213 | + # pattern fields first | ||
214 | + self.output_fields(ind, lambda n: 'u.f_' + arg + '.' + n) | ||
215 | + assert not extracted, "dangling fmt refs but it was already extracted" | ||
216 | if not extracted: | ||
217 | output(ind, self.base.extract_name(), | ||
218 | '(ctx, &u.f_', arg, ', insn);\n') | ||
219 | - self.output_fields(ind, lambda n: 'u.f_' + arg + '.' + n) | ||
220 | + if not fmt_refs: | ||
221 | + # pattern fields last | ||
222 | + self.output_fields(ind, lambda n: 'u.f_' + arg + '.' + n) | ||
223 | + | ||
224 | output(ind, 'if (', translate_prefix, '_', self.name, | ||
225 | '(ctx, &u.f_', arg, ')) return true;\n') | ||
226 | |||
227 | @@ -XXX,XX +XXX,XX @@ def output_code(self, i, extracted, outerbits, outermask): | ||
228 | ind = str_indent(i) | ||
229 | |||
230 | # If we identified all nodes below have the same format, | ||
231 | - # extract the fields now. | ||
232 | - if not extracted and self.base: | ||
233 | + # extract the fields now. But don't do it if the format relies | ||
234 | + # on named fields from the insn pattern, as those won't have | ||
235 | + # been initialised at this point. | ||
236 | + if not extracted and self.base and not self.base.dangling_references(): | ||
237 | output(ind, self.base.extract_name(), | ||
238 | '(ctx, &u.f_', self.base.base.name, ', insn);\n') | ||
239 | extracted = True | ||
240 | @@ -XXX,XX +XXX,XX @@ def parse_field(lineno, name, toks): | ||
241 | """Parse one instruction field from TOKS at LINENO""" | ||
242 | global fields | ||
243 | global insnwidth | ||
244 | + global re_C_ident | ||
245 | |||
246 | # A "simple" field will have only one entry; | ||
247 | # a "multifield" will have several. | ||
248 | @@ -XXX,XX +XXX,XX @@ def parse_field(lineno, name, toks): | ||
249 | func = func[1] | ||
250 | continue | ||
251 | |||
252 | + if re.fullmatch(re_C_ident + ':s[0-9]+', t): | ||
253 | + # Signed named field | ||
254 | + subtoks = t.split(':') | ||
255 | + n = subtoks[0] | ||
256 | + le = int(subtoks[1]) | ||
257 | + f = NamedField(n, True, le) | ||
258 | + subs.append(f) | ||
259 | + width += le | ||
260 | + continue | ||
261 | + if re.fullmatch(re_C_ident + ':[0-9]+', t): | ||
262 | + # Unsigned named field | ||
263 | + subtoks = t.split(':') | ||
264 | + n = subtoks[0] | ||
265 | + le = int(subtoks[1]) | ||
266 | + f = NamedField(n, False, le) | ||
267 | + subs.append(f) | ||
268 | + width += le | ||
269 | + continue | ||
270 | + | ||
271 | if re.fullmatch('[0-9]+:s[0-9]+', t): | ||
272 | # Signed field extract | ||
273 | subtoks = t.split(':s') | ||
274 | -- | ||
275 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Peter Maydell <peter.maydell@linaro.org> | ||
1 | 2 | ||
3 | Add some tests for various cases of named-field use, both ones that | ||
4 | should work and ones that should be diagnosed as errors. | ||
5 | |||
6 | Signed-off-by: Peter Maydell <peter.maydell@linaro.org> | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-Id: <20230523120447.728365-7-peter.maydell@linaro.org> | ||
9 | --- | ||
10 | tests/decode/err_field10.decode | 7 +++++++ | ||
11 | tests/decode/err_field7.decode | 7 +++++++ | ||
12 | tests/decode/err_field8.decode | 8 ++++++++ | ||
13 | tests/decode/err_field9.decode | 14 ++++++++++++++ | ||
14 | tests/decode/succ_named_field.decode | 19 +++++++++++++++++++ | ||
15 | tests/decode/meson.build | 5 +++++ | ||
16 | 6 files changed, 60 insertions(+) | ||
17 | create mode 100644 tests/decode/err_field10.decode | ||
18 | create mode 100644 tests/decode/err_field7.decode | ||
19 | create mode 100644 tests/decode/err_field8.decode | ||
20 | create mode 100644 tests/decode/err_field9.decode | ||
21 | create mode 100644 tests/decode/succ_named_field.decode | ||
22 | |||
23 | diff --git a/tests/decode/err_field10.decode b/tests/decode/err_field10.decode | ||
24 | new file mode 100644 | ||
25 | index XXXXXXX..XXXXXXX | ||
26 | --- /dev/null | ||
27 | +++ b/tests/decode/err_field10.decode | ||
28 | @@ -XXX,XX +XXX,XX @@ | ||
29 | +# This work is licensed under the terms of the GNU LGPL, version 2 or later. | ||
30 | +# See the COPYING.LIB file in the top-level directory. | ||
31 | + | ||
32 | +# Diagnose formats which refer to undefined fields | ||
33 | +%field1 field2:3 | ||
34 | +@fmt ........ ........ ........ ........ %field1 | ||
35 | +insn 00000000 00000000 00000000 00000000 @fmt | ||
36 | diff --git a/tests/decode/err_field7.decode b/tests/decode/err_field7.decode | ||
37 | new file mode 100644 | ||
38 | index XXXXXXX..XXXXXXX | ||
39 | --- /dev/null | ||
40 | +++ b/tests/decode/err_field7.decode | ||
41 | @@ -XXX,XX +XXX,XX @@ | ||
42 | +# This work is licensed under the terms of the GNU LGPL, version 2 or later. | ||
43 | +# See the COPYING.LIB file in the top-level directory. | ||
44 | + | ||
45 | +# Diagnose fields whose definitions form a loop | ||
46 | +%field1 field2:3 | ||
47 | +%field2 field1:4 | ||
48 | +insn 00000000 00000000 00000000 00000000 %field1 %field2 | ||
49 | diff --git a/tests/decode/err_field8.decode b/tests/decode/err_field8.decode | ||
50 | new file mode 100644 | ||
51 | index XXXXXXX..XXXXXXX | ||
52 | --- /dev/null | ||
53 | +++ b/tests/decode/err_field8.decode | ||
54 | @@ -XXX,XX +XXX,XX @@ | ||
55 | +# This work is licensed under the terms of the GNU LGPL, version 2 or later. | ||
56 | +# See the COPYING.LIB file in the top-level directory. | ||
57 | + | ||
58 | +# Diagnose patterns which refer to undefined fields | ||
59 | +&f1 f1 a | ||
60 | +%field1 field2:3 | ||
61 | +@fmt ........ ........ ........ .... a:4 &f1 | ||
62 | +insn 00000000 00000000 00000000 0000 .... @fmt f1=%field1 | ||
63 | diff --git a/tests/decode/err_field9.decode b/tests/decode/err_field9.decode | ||
64 | new file mode 100644 | ||
65 | index XXXXXXX..XXXXXXX | ||
66 | --- /dev/null | ||
67 | +++ b/tests/decode/err_field9.decode | ||
68 | @@ -XXX,XX +XXX,XX @@ | ||
69 | +# This work is licensed under the terms of the GNU LGPL, version 2 or later. | ||
70 | +# See the COPYING.LIB file in the top-level directory. | ||
71 | + | ||
72 | +# Diagnose fields where the format refers to a field defined in the | ||
73 | +# pattern and the pattern refers to a field defined in the format. | ||
74 | +# This is theoretically not impossible to implement, but is not | ||
75 | +# supported by the script at this time. | ||
76 | +&abcd a b c d | ||
77 | +%refa a:3 | ||
78 | +%refc c:4 | ||
79 | +# Format defines 'c' and sets 'b' to an indirect ref to 'a' | ||
80 | +@fmt ........ ........ ........ c:8 &abcd b=%refa | ||
81 | +# Pattern defines 'a' and sets 'd' to an indirect ref to 'c' | ||
82 | +insn 00000000 00000000 00000000 ........ @fmt d=%refc a=6 | ||
83 | diff --git a/tests/decode/succ_named_field.decode b/tests/decode/succ_named_field.decode | ||
84 | new file mode 100644 | ||
85 | index XXXXXXX..XXXXXXX | ||
86 | --- /dev/null | ||
87 | +++ b/tests/decode/succ_named_field.decode | ||
88 | @@ -XXX,XX +XXX,XX @@ | ||
89 | +# This work is licensed under the terms of the GNU LGPL, version 2 or later. | ||
90 | +# See the COPYING.LIB file in the top-level directory. | ||
91 | + | ||
92 | +# field using a named_field | ||
93 | +%imm_sz 8:8 sz:3 | ||
94 | +insn 00000000 00000000 ........ 00000000 imm_sz=%imm_sz sz=1 | ||
95 | + | ||
96 | +# Ditto, via a format. Here a field in the format | ||
97 | +# references a named field defined in the insn pattern: | ||
98 | +&imm_a imm alpha | ||
99 | +%foo 0:16 alpha:4 | ||
100 | +@foo 00000001 ........ ........ ........ &imm_a imm=%foo | ||
101 | +i1 ........ 00000000 ........ ........ @foo alpha=1 | ||
102 | +i2 ........ 00000001 ........ ........ @foo alpha=2 | ||
103 | + | ||
104 | +# Here the named field is defined in the format and referenced | ||
105 | +# from the insn pattern: | ||
106 | +@bar 00000010 ........ ........ ........ &imm_a alpha=4 | ||
107 | +i3 ........ 00000000 ........ ........ @bar imm=%foo | ||
108 | diff --git a/tests/decode/meson.build b/tests/decode/meson.build | ||
109 | index XXXXXXX..XXXXXXX 100644 | ||
110 | --- a/tests/decode/meson.build | ||
111 | +++ b/tests/decode/meson.build | ||
112 | @@ -XXX,XX +XXX,XX @@ err_tests = [ | ||
113 | 'err_field4.decode', | ||
114 | 'err_field5.decode', | ||
115 | 'err_field6.decode', | ||
116 | + 'err_field7.decode', | ||
117 | + 'err_field8.decode', | ||
118 | + 'err_field9.decode', | ||
119 | + 'err_field10.decode', | ||
120 | 'err_init1.decode', | ||
121 | 'err_init2.decode', | ||
122 | 'err_init3.decode', | ||
123 | @@ -XXX,XX +XXX,XX @@ succ_tests = [ | ||
124 | 'succ_argset_type1.decode', | ||
125 | 'succ_function.decode', | ||
126 | 'succ_ident1.decode', | ||
127 | + 'succ_named_field.decode', | ||
128 | 'succ_pattern_group_nest1.decode', | ||
129 | 'succ_pattern_group_nest2.decode', | ||
130 | 'succ_pattern_group_nest3.decode', | ||
131 | -- | ||
132 | 2.34.1 | diff view generated by jsdifflib |