1
The following changes since commit 75d30fde55485b965a1168a21d016dd07b50ed32:
1
The following changes since commit 7fe6cb68117ac856e03c93d18aca09de015392b0:
2
2
3
Merge tag 'block-pull-request' of https://gitlab.com/stefanha/qemu into staging (2022-10-30 15:07:25 -0400)
3
Merge tag 'pull-target-arm-20230530-1' of https://git.linaro.org/people/pmaydell/qemu-arm into staging (2023-05-30 08:02:05 -0700)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20221031
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230530
8
8
9
for you to fetch changes up to cb375590983fc3d23600d02ba05a05d34fe44150:
9
for you to fetch changes up to 276d77de503e8f5f5cbd3f7d94302ca12d1d982e:
10
10
11
target/i386: Expand eflags updates inline (2022-10-31 11:39:10 +1100)
11
tests/decode: Add tests for various named-field cases (2023-05-30 10:55:39 -0700)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Remove sparc32plus support from tcg/sparc.
14
Improvements to 128-bit atomics:
15
target/i386: Use cpu_unwind_state_data for tpr access.
15
- Separate __int128_t type and arithmetic detection
16
target/i386: Expand eflags updates inline
16
- Support 128-bit load/store in backend for i386, aarch64, ppc64, s390x
17
- Accelerate atomics via host/include/
18
Decodetree:
19
- Add named field syntax
20
- Move tests to meson
17
21
18
----------------------------------------------------------------
22
----------------------------------------------------------------
19
Icenowy Zheng (1):
23
Peter Maydell (5):
20
tcg/tci: fix logic error when registering helpers via FFI
24
docs: Document decodetree named field syntax
25
scripts/decodetree: Pass lvalue-formatter function to str_extract()
26
scripts/decodetree: Implement a topological sort
27
scripts/decodetree: Implement named field support
28
tests/decode: Add tests for various named-field cases
21
29
22
Richard Henderson (10):
30
Richard Henderson (22):
23
tcg/sparc: Remove support for sparc32plus
31
tcg: Fix register move type in tcg_out_ld_helper_ret
24
tcg/sparc64: Rename from tcg/sparc
32
accel/tcg: Fix check for page writeability in load_atomic16_or_exit
25
tcg/sparc64: Remove sparc32plus constraints
33
meson: Split test for __int128_t type from __int128_t arithmetic
26
accel/tcg: Introduce cpu_unwind_state_data
34
qemu/atomic128: Add x86_64 atomic128-ldst.h
27
target/i386: Use cpu_unwind_state_data for tpr access
35
tcg/i386: Support 128-bit load/store
28
target/openrisc: Always exit after mtspr npc
36
tcg/aarch64: Rename temporaries
29
target/openrisc: Use cpu_unwind_state_data for mfspr
37
tcg/aarch64: Reserve TCG_REG_TMP1, TCG_REG_TMP2
30
accel/tcg: Remove will_exit argument from cpu_restore_state
38
tcg/aarch64: Simplify constraints on qemu_ld/st
31
accel/tcg: Remove reset_icount argument from cpu_restore_state_from_tb
39
tcg/aarch64: Support 128-bit load/store
32
target/i386: Expand eflags updates inline
40
tcg/ppc: Support 128-bit load/store
41
tcg/s390x: Support 128-bit load/store
42
accel/tcg: Extract load_atom_extract_al16_or_al8 to host header
43
accel/tcg: Extract store_atom_insert_al16 to host header
44
accel/tcg: Add x86_64 load_atom_extract_al16_or_al8
45
accel/tcg: Add aarch64 lse2 load_atom_extract_al16_or_al8
46
accel/tcg: Add aarch64 store_atom_insert_al16
47
tcg: Remove TCG_TARGET_TLB_DISPLACEMENT_BITS
48
decodetree: Add --test-for-error
49
decodetree: Fix recursion in prop_format and build_tree
50
decodetree: Diagnose empty pattern group
51
decodetree: Do not remove output_file from /dev
52
tests/decode: Convert tests to meson
33
53
34
meson.build | 4 +-
54
docs/devel/decodetree.rst | 33 ++-
35
accel/tcg/internal.h | 4 +-
55
meson.build | 15 +-
36
include/exec/exec-all.h | 24 ++-
56
host/include/aarch64/host/load-extract-al16-al8.h | 40 ++++
37
target/i386/helper.h | 5 -
57
host/include/aarch64/host/store-insert-al16.h | 47 ++++
38
tcg/{sparc => sparc64}/tcg-target-con-set.h | 16 +-
58
host/include/generic/host/load-extract-al16-al8.h | 45 ++++
39
tcg/{sparc => sparc64}/tcg-target-con-str.h | 3 -
59
host/include/generic/host/store-insert-al16.h | 50 ++++
40
tcg/{sparc => sparc64}/tcg-target.h | 11 --
60
host/include/x86_64/host/atomic128-ldst.h | 68 ++++++
41
accel/tcg/cpu-exec-common.c | 2 +-
61
host/include/x86_64/host/load-extract-al16-al8.h | 50 ++++
42
accel/tcg/tb-maint.c | 4 +-
62
include/qemu/int128.h | 4 +-
43
accel/tcg/translate-all.c | 91 +++++----
63
tcg/aarch64/tcg-target-con-set.h | 4 +-
44
target/alpha/helper.c | 2 +-
64
tcg/aarch64/tcg-target-con-str.h | 1 -
45
target/alpha/mem_helper.c | 2 +-
65
tcg/aarch64/tcg-target.h | 12 +-
46
target/arm/op_helper.c | 2 +-
66
tcg/arm/tcg-target.h | 1 -
47
target/arm/tlb_helper.c | 8 +-
67
tcg/i386/tcg-target.h | 5 +-
48
target/cris/helper.c | 2 +-
68
tcg/mips/tcg-target.h | 1 -
49
target/i386/helper.c | 21 ++-
69
tcg/ppc/tcg-target-con-set.h | 2 +
50
target/i386/tcg/cc_helper.c | 41 -----
70
tcg/ppc/tcg-target-con-str.h | 1 +
51
target/i386/tcg/sysemu/svm_helper.c | 2 +-
71
tcg/ppc/tcg-target.h | 4 +-
52
target/i386/tcg/translate.c | 30 ++-
72
tcg/riscv/tcg-target.h | 1 -
53
target/m68k/op_helper.c | 4 +-
73
tcg/s390x/tcg-target-con-set.h | 2 +
54
target/microblaze/helper.c | 2 +-
74
tcg/s390x/tcg-target.h | 3 +-
55
target/nios2/op_helper.c | 2 +-
75
tcg/sparc64/tcg-target.h | 1 -
56
target/openrisc/sys_helper.c | 17 +-
76
tcg/tci/tcg-target.h | 1 -
57
target/ppc/excp_helper.c | 2 +-
77
tests/decode/err_field10.decode | 7 +
58
target/s390x/tcg/excp_helper.c | 2 +-
78
tests/decode/err_field7.decode | 7 +
59
target/tricore/op_helper.c | 2 +-
79
tests/decode/err_field8.decode | 8 +
60
target/xtensa/helper.c | 6 +-
80
tests/decode/err_field9.decode | 14 ++
61
tcg/tcg.c | 81 +-------
81
tests/decode/succ_named_field.decode | 19 ++
62
tcg/{sparc => sparc64}/tcg-target.c.inc | 275 ++++++++--------------------
82
tcg/tcg.c | 4 +-
63
MAINTAINERS | 2 +-
83
accel/tcg/ldst_atomicity.c.inc | 80 +------
64
30 files changed, 232 insertions(+), 437 deletions(-)
84
tcg/aarch64/tcg-target.c.inc | 243 +++++++++++++++-----
65
rename tcg/{sparc => sparc64}/tcg-target-con-set.h (69%)
85
tcg/i386/tcg-target.c.inc | 191 +++++++++++++++-
66
rename tcg/{sparc => sparc64}/tcg-target-con-str.h (77%)
86
tcg/ppc/tcg-target.c.inc | 108 ++++++++-
67
rename tcg/{sparc => sparc64}/tcg-target.h (95%)
87
tcg/s390x/tcg-target.c.inc | 107 ++++++++-
68
rename tcg/{sparc => sparc64}/tcg-target.c.inc (91%)
88
scripts/decodetree.py | 265 ++++++++++++++++++++--
89
tests/decode/check.sh | 24 --
90
tests/decode/meson.build | 64 ++++++
91
tests/meson.build | 5 +-
92
38 files changed, 1312 insertions(+), 225 deletions(-)
93
create mode 100644 host/include/aarch64/host/load-extract-al16-al8.h
94
create mode 100644 host/include/aarch64/host/store-insert-al16.h
95
create mode 100644 host/include/generic/host/load-extract-al16-al8.h
96
create mode 100644 host/include/generic/host/store-insert-al16.h
97
create mode 100644 host/include/x86_64/host/atomic128-ldst.h
98
create mode 100644 host/include/x86_64/host/load-extract-al16-al8.h
99
create mode 100644 tests/decode/err_field10.decode
100
create mode 100644 tests/decode/err_field7.decode
101
create mode 100644 tests/decode/err_field8.decode
102
create mode 100644 tests/decode/err_field9.decode
103
create mode 100644 tests/decode/succ_named_field.decode
104
delete mode 100755 tests/decode/check.sh
105
create mode 100644 tests/decode/meson.build
diff view generated by jsdifflib
1
From: Icenowy Zheng <uwu@icenowy.me>
1
The first move was incorrectly using TCG_TYPE_I32 while the second
2
move was correctly using TCG_TYPE_REG. This prevents a 64-bit host
3
from moving all 128-bits of the return value.
2
4
3
When registering helpers via FFI for TCI, the inner loop that iterates
5
Fixes: ebebea53ef8 ("tcg: Support TCG_TYPE_I128 in tcg_out_{ld,st}_helper_{args,ret}")
4
parameters of the helper reuses (and thus pollutes) the same variable
5
used by the outer loop that iterates all helpers, thus made some helpers
6
unregistered.
7
8
Fix this logic error by using a dedicated temporary variable for the
9
inner loop.
10
11
Fixes: 22f15579fa ("tcg: Build ffi data structures for helpers")
12
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
13
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
14
Signed-off-by: Icenowy Zheng <uwu@icenowy.me>
15
Message-Id: <20221028072145.1593205-1-uwu@icenowy.me>
16
[rth: Move declaration of j to the for loop itself]
17
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
18
---
8
---
19
tcg/tcg.c | 6 +++---
9
tcg/tcg.c | 4 ++--
20
1 file changed, 3 insertions(+), 3 deletions(-)
10
1 file changed, 2 insertions(+), 2 deletions(-)
21
11
22
diff --git a/tcg/tcg.c b/tcg/tcg.c
12
diff --git a/tcg/tcg.c b/tcg/tcg.c
23
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
24
--- a/tcg/tcg.c
14
--- a/tcg/tcg.c
25
+++ b/tcg/tcg.c
15
+++ b/tcg/tcg.c
26
@@ -XXX,XX +XXX,XX @@ static void tcg_context_init(unsigned max_cpus)
16
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
27
17
mov[0].dst = ldst->datalo_reg;
28
if (nargs != 0) {
18
mov[0].src =
29
ca->cif.arg_types = ca->args;
19
tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
30
- for (i = 0; i < nargs; ++i) {
20
- mov[0].dst_type = TCG_TYPE_I32;
31
- int typecode = extract32(typemask, (i + 1) * 3, 3);
21
- mov[0].src_type = TCG_TYPE_I32;
32
- ca->args[i] = typecode_to_ffi[typecode];
22
+ mov[0].dst_type = TCG_TYPE_REG;
33
+ for (int j = 0; j < nargs; ++j) {
23
+ mov[0].src_type = TCG_TYPE_REG;
34
+ int typecode = extract32(typemask, (j + 1) * 3, 3);
24
mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
35
+ ca->args[j] = typecode_to_ffi[typecode];
25
36
}
26
mov[1].dst = ldst->datahi_reg;
37
}
38
39
--
27
--
40
2.34.1
28
2.34.1
41
42
diff view generated by jsdifflib
New patch
1
PAGE_WRITE is current writability, as modified by TB protection;
2
PAGE_WRITE_ORG is the original page writability.
1
3
4
Fixes: cdfac37be0d ("accel/tcg: Honor atomicity of loads")
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
accel/tcg/ldst_atomicity.c.inc | 4 ++--
9
1 file changed, 2 insertions(+), 2 deletions(-)
10
11
diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/accel/tcg/ldst_atomicity.c.inc
14
+++ b/accel/tcg/ldst_atomicity.c.inc
15
@@ -XXX,XX +XXX,XX @@ static uint64_t load_atomic8_or_exit(CPUArchState *env, uintptr_t ra, void *pv)
16
* another process, because the fallback start_exclusive solution
17
* provides no protection across processes.
18
*/
19
- if (!page_check_range(h2g(pv), 8, PAGE_WRITE)) {
20
+ if (!page_check_range(h2g(pv), 8, PAGE_WRITE_ORG)) {
21
uint64_t *p = __builtin_assume_aligned(pv, 8);
22
return *p;
23
}
24
@@ -XXX,XX +XXX,XX @@ static Int128 load_atomic16_or_exit(CPUArchState *env, uintptr_t ra, void *pv)
25
* another process, because the fallback start_exclusive solution
26
* provides no protection across processes.
27
*/
28
- if (!page_check_range(h2g(p), 16, PAGE_WRITE)) {
29
+ if (!page_check_range(h2g(p), 16, PAGE_WRITE_ORG)) {
30
return *p;
31
}
32
#endif
33
--
34
2.34.1
diff view generated by jsdifflib
1
Emphasize that we only support full 64-bit code generation.
1
Older versions of clang have missing runtime functions for arithmetic
2
with -fsanitize=undefined (see 464e3671f9d5c), so we cannot use
3
__int128_t for implementing Int128. But __int128_t is present,
4
data movement works, and it can be used for atomic128.
5
6
Probe for both CONFIG_INT128_TYPE and CONFIG_INT128, adjust
7
qemu/int128.h to define Int128Alias if CONFIG_INT128_TYPE,
8
and adjust the meson probe for atomics to use has_int128_type.
2
9
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
12
---
7
meson.build | 4 +---
13
meson.build | 15 ++++++++++-----
8
tcg/{sparc => sparc64}/tcg-target-con-set.h | 0
14
include/qemu/int128.h | 4 ++--
9
tcg/{sparc => sparc64}/tcg-target-con-str.h | 0
15
2 files changed, 12 insertions(+), 7 deletions(-)
10
tcg/{sparc => sparc64}/tcg-target.h | 0
11
tcg/{sparc => sparc64}/tcg-target.c.inc | 0
12
MAINTAINERS | 2 +-
13
6 files changed, 2 insertions(+), 4 deletions(-)
14
rename tcg/{sparc => sparc64}/tcg-target-con-set.h (100%)
15
rename tcg/{sparc => sparc64}/tcg-target-con-str.h (100%)
16
rename tcg/{sparc => sparc64}/tcg-target.h (100%)
17
rename tcg/{sparc => sparc64}/tcg-target.c.inc (100%)
18
16
19
diff --git a/meson.build b/meson.build
17
diff --git a/meson.build b/meson.build
20
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
21
--- a/meson.build
19
--- a/meson.build
22
+++ b/meson.build
20
+++ b/meson.build
23
@@ -XXX,XX +XXX,XX @@ qapi_trace_events = []
21
@@ -XXX,XX +XXX,XX @@ config_host_data.set('CONFIG_ATOMIC64', cc.links('''
24
bsd_oses = ['gnu/kfreebsd', 'freebsd', 'netbsd', 'openbsd', 'dragonfly', 'darwin']
22
return 0;
25
supported_oses = ['windows', 'freebsd', 'netbsd', 'openbsd', 'darwin', 'sunos', 'linux']
23
}'''))
26
supported_cpus = ['ppc', 'ppc64', 's390x', 'riscv', 'x86', 'x86_64',
24
27
- 'arm', 'aarch64', 'loongarch64', 'mips', 'mips64', 'sparc', 'sparc64']
25
-has_int128 = cc.links('''
28
+ 'arm', 'aarch64', 'loongarch64', 'mips', 'mips64', 'sparc64']
26
+has_int128_type = cc.compiles('''
29
27
+ __int128_t a;
30
cpu = host_machine.cpu_family()
28
+ __uint128_t b;
31
29
+ int main(void) { b = a; }''')
32
@@ -XXX,XX +XXX,XX @@ if get_option('tcg').allowed()
30
+config_host_data.set('CONFIG_INT128_TYPE', has_int128_type)
33
endif
31
+
34
if get_option('tcg_interpreter')
32
+has_int128 = has_int128_type and cc.links('''
35
tcg_arch = 'tci'
33
__int128_t a;
36
- elif host_arch == 'sparc64'
34
__uint128_t b;
37
- tcg_arch = 'sparc'
35
int main (void) {
38
elif host_arch == 'x86_64'
36
@@ -XXX,XX +XXX,XX @@ has_int128 = cc.links('''
39
tcg_arch = 'i386'
37
a = a * a;
40
elif host_arch == 'ppc64'
38
return 0;
41
diff --git a/tcg/sparc/tcg-target-con-set.h b/tcg/sparc64/tcg-target-con-set.h
39
}''')
42
similarity index 100%
40
-
43
rename from tcg/sparc/tcg-target-con-set.h
41
config_host_data.set('CONFIG_INT128', has_int128)
44
rename to tcg/sparc64/tcg-target-con-set.h
42
45
diff --git a/tcg/sparc/tcg-target-con-str.h b/tcg/sparc64/tcg-target-con-str.h
43
-if has_int128
46
similarity index 100%
44
+if has_int128_type
47
rename from tcg/sparc/tcg-target-con-str.h
45
# "do we have 128-bit atomics which are handled inline and specifically not
48
rename to tcg/sparc64/tcg-target-con-str.h
46
# via libatomic". The reason we can't use libatomic is documented in the
49
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc64/tcg-target.h
47
# comment starting "GCC is a house divided" in include/qemu/atomic128.h.
50
similarity index 100%
48
@@ -XXX,XX +XXX,XX @@ if has_int128
51
rename from tcg/sparc/tcg-target.h
49
# __alignof(unsigned __int128) for the host.
52
rename to tcg/sparc64/tcg-target.h
50
atomic_test_128 = '''
53
diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
51
int main(int ac, char **av) {
54
similarity index 100%
52
- unsigned __int128 *p = __builtin_assume_aligned(av[ac - 1], 16);
55
rename from tcg/sparc/tcg-target.c.inc
53
+ __uint128_t *p = __builtin_assume_aligned(av[ac - 1], 16);
56
rename to tcg/sparc64/tcg-target.c.inc
54
p[1] = __atomic_load_n(&p[0], __ATOMIC_RELAXED);
57
diff --git a/MAINTAINERS b/MAINTAINERS
55
__atomic_store_n(&p[2], p[3], __ATOMIC_RELAXED);
56
__atomic_compare_exchange_n(&p[4], &p[5], p[6], 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
57
@@ -XXX,XX +XXX,XX @@ if has_int128
58
config_host_data.set('CONFIG_CMPXCHG128', cc.links('''
59
int main(void)
60
{
61
- unsigned __int128 x = 0, y = 0;
62
+ __uint128_t x = 0, y = 0;
63
__sync_val_compare_and_swap_16(&x, y, x);
64
return 0;
65
}
66
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
58
index XXXXXXX..XXXXXXX 100644
67
index XXXXXXX..XXXXXXX 100644
59
--- a/MAINTAINERS
68
--- a/include/qemu/int128.h
60
+++ b/MAINTAINERS
69
+++ b/include/qemu/int128.h
61
@@ -XXX,XX +XXX,XX @@ L: qemu-s390x@nongnu.org
70
@@ -XXX,XX +XXX,XX @@ static inline void bswap128s(Int128 *s)
62
71
* a possible structure and the native types. Ease parameter passing
63
SPARC TCG target
72
* via use of the transparent union extension.
64
S: Odd Fixes
73
*/
65
-F: tcg/sparc/
74
-#ifdef CONFIG_INT128
66
+F: tcg/sparc64/
75
+#ifdef CONFIG_INT128_TYPE
67
F: disas/sparc.c
76
typedef union {
68
77
__uint128_t u;
69
TCI TCG target
78
__int128_t i;
79
@@ -XXX,XX +XXX,XX @@ typedef union {
80
} Int128Alias __attribute__((transparent_union));
81
#else
82
typedef Int128 Int128Alias;
83
-#endif /* CONFIG_INT128 */
84
+#endif /* CONFIG_INT128_TYPE */
85
86
#endif /* INT128_H */
70
--
87
--
71
2.34.1
88
2.34.1
72
73
diff view generated by jsdifflib
1
Since 9b9c37c36439, we have only supported sparc64 cpus.
1
With CPUINFO_ATOMIC_VMOVDQA, we can perform proper atomic
2
Debian and Gentoo now only support 64-bit sparc64 userland,
2
load/store without cmpxchg16b.
3
so it is time to drop the 32-bit sparc64 userland: sparc32plus.
4
3
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
6
---
9
tcg/sparc/tcg-target.h | 11 ---
7
host/include/x86_64/host/atomic128-ldst.h | 68 +++++++++++++++++++++++
10
tcg/tcg.c | 75 +----------------
8
1 file changed, 68 insertions(+)
11
tcg/sparc/tcg-target.c.inc | 166 +++++++------------------------------
9
create mode 100644 host/include/x86_64/host/atomic128-ldst.h
12
3 files changed, 33 insertions(+), 219 deletions(-)
13
10
14
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
11
diff --git a/host/include/x86_64/host/atomic128-ldst.h b/host/include/x86_64/host/atomic128-ldst.h
15
index XXXXXXX..XXXXXXX 100644
12
new file mode 100644
16
--- a/tcg/sparc/tcg-target.h
13
index XXXXXXX..XXXXXXX
17
+++ b/tcg/sparc/tcg-target.h
14
--- /dev/null
15
+++ b/host/include/x86_64/host/atomic128-ldst.h
18
@@ -XXX,XX +XXX,XX @@
16
@@ -XXX,XX +XXX,XX @@
19
#ifndef SPARC_TCG_TARGET_H
17
+/*
20
#define SPARC_TCG_TARGET_H
18
+ * SPDX-License-Identifier: GPL-2.0-or-later
21
19
+ * Load/store for 128-bit atomic operations, x86_64 version.
22
-#define TCG_TARGET_REG_BITS 64
20
+ *
23
-
21
+ * Copyright (C) 2023 Linaro, Ltd.
24
#define TCG_TARGET_INSN_UNIT_SIZE 4
22
+ *
25
#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
23
+ * See docs/devel/atomics.rst for discussion about the guarantees each
26
#define TCG_TARGET_NB_REGS 32
24
+ * atomic primitive is meant to provide.
27
@@ -XXX,XX +XXX,XX @@ typedef enum {
25
+ */
28
/* used for function call generation */
26
+
29
#define TCG_REG_CALL_STACK TCG_REG_O6
27
+#ifndef AARCH64_ATOMIC128_LDST_H
30
28
+#define AARCH64_ATOMIC128_LDST_H
31
-#ifdef __arch64__
29
+
32
#define TCG_TARGET_STACK_BIAS 2047
30
+#ifdef CONFIG_INT128_TYPE
33
#define TCG_TARGET_STACK_ALIGN 16
31
+#include "host/cpuinfo.h"
34
#define TCG_TARGET_CALL_STACK_OFFSET (128 + 6*8 + TCG_TARGET_STACK_BIAS)
32
+#include "tcg/debug-assert.h"
35
-#else
33
+
36
-#define TCG_TARGET_STACK_BIAS 0
34
+/*
37
-#define TCG_TARGET_STACK_ALIGN 8
35
+ * Through clang 16, with -mcx16, __atomic_load_n is incorrectly
38
-#define TCG_TARGET_CALL_STACK_OFFSET (64 + 4 + 6*4)
36
+ * expanded to a read-write operation: lock cmpxchg16b.
39
-#endif
37
+ */
40
-
38
+
41
-#ifdef __arch64__
39
+#define HAVE_ATOMIC128_RO likely(cpuinfo & CPUINFO_ATOMIC_VMOVDQA)
42
#define TCG_TARGET_EXTEND_ARGS 1
40
+#define HAVE_ATOMIC128_RW 1
43
-#endif
41
+
44
42
+static inline Int128 atomic16_read_ro(const Int128 *ptr)
45
#if defined(__VIS__) && __VIS__ >= 0x300
43
+{
46
#define use_vis3_instructions 1
44
+ Int128Alias r;
47
diff --git a/tcg/tcg.c b/tcg/tcg.c
45
+
48
index XXXXXXX..XXXXXXX 100644
46
+ tcg_debug_assert(HAVE_ATOMIC128_RO);
49
--- a/tcg/tcg.c
47
+ asm("vmovdqa %1, %0" : "=x" (r.i) : "m" (*ptr));
50
+++ b/tcg/tcg.c
48
+
51
@@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
49
+ return r.s;
52
}
50
+}
53
#endif
51
+
54
52
+static inline Int128 atomic16_read_rw(Int128 *ptr)
55
-#if defined(__sparc__) && !defined(__arch64__) \
53
+{
56
- && !defined(CONFIG_TCG_INTERPRETER)
54
+ __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
57
- /* We have 64-bit values in one register, but need to pass as two
55
+ Int128Alias r;
58
- separate parameters. Split them. */
56
+
59
- int orig_typemask = typemask;
57
+ if (HAVE_ATOMIC128_RO) {
60
- int orig_nargs = nargs;
58
+ asm("vmovdqa %1, %0" : "=x" (r.i) : "m" (*ptr_align));
61
- TCGv_i64 retl, reth;
59
+ } else {
62
- TCGTemp *split_args[MAX_OPC_PARAM];
60
+ r.i = __sync_val_compare_and_swap_16(ptr_align, 0, 0);
63
-
61
+ }
64
- retl = NULL;
62
+ return r.s;
65
- reth = NULL;
63
+}
66
- typemask = 0;
64
+
67
- for (i = real_args = 0; i < nargs; ++i) {
65
+static inline void atomic16_set(Int128 *ptr, Int128 val)
68
- int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
66
+{
69
- bool is_64bit = (argtype & ~1) == dh_typecode_i64;
67
+ __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
70
-
68
+ Int128Alias new = { .s = val };
71
- if (is_64bit) {
69
+
72
- TCGv_i64 orig = temp_tcgv_i64(args[i]);
70
+ if (HAVE_ATOMIC128_RO) {
73
- TCGv_i32 h = tcg_temp_new_i32();
71
+ asm("vmovdqa %1, %0" : "=m"(*ptr_align) : "x" (new.i));
74
- TCGv_i32 l = tcg_temp_new_i32();
72
+ } else {
75
- tcg_gen_extr_i64_i32(l, h, orig);
73
+ __int128_t old;
76
- split_args[real_args++] = tcgv_i32_temp(h);
74
+ do {
77
- typemask |= dh_typecode_i32 << (real_args * 3);
75
+ old = *ptr_align;
78
- split_args[real_args++] = tcgv_i32_temp(l);
76
+ } while (!__sync_bool_compare_and_swap_16(ptr_align, old, new.i));
79
- typemask |= dh_typecode_i32 << (real_args * 3);
77
+ }
80
- } else {
78
+}
81
- split_args[real_args++] = args[i];
79
+#else
82
- typemask |= argtype << (real_args * 3);
80
+/* Provide QEMU_ERROR stubs. */
83
- }
81
+#include "host/include/generic/host/atomic128-ldst.h"
84
- }
85
- nargs = real_args;
86
- args = split_args;
87
-#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
88
+#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
89
for (i = 0; i < nargs; ++i) {
90
int argtype = extract32(typemask, (i + 1) * 3, 3);
91
bool is_32bit = (argtype & ~1) == dh_typecode_i32;
92
@@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
93
94
pi = 0;
95
if (ret != NULL) {
96
-#if defined(__sparc__) && !defined(__arch64__) \
97
- && !defined(CONFIG_TCG_INTERPRETER)
98
- if ((typemask & 6) == dh_typecode_i64) {
99
- /* The 32-bit ABI is going to return the 64-bit value in
100
- the %o0/%o1 register pair. Prepare for this by using
101
- two return temporaries, and reassemble below. */
102
- retl = tcg_temp_new_i64();
103
- reth = tcg_temp_new_i64();
104
- op->args[pi++] = tcgv_i64_arg(reth);
105
- op->args[pi++] = tcgv_i64_arg(retl);
106
- nb_rets = 2;
107
- } else {
108
- op->args[pi++] = temp_arg(ret);
109
- nb_rets = 1;
110
- }
111
-#else
112
if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) {
113
#if HOST_BIG_ENDIAN
114
op->args[pi++] = temp_arg(ret + 1);
115
@@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
116
op->args[pi++] = temp_arg(ret);
117
nb_rets = 1;
118
}
119
-#endif
120
} else {
121
nb_rets = 0;
122
}
123
@@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
124
tcg_debug_assert(TCGOP_CALLI(op) == real_args);
125
tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
126
127
-#if defined(__sparc__) && !defined(__arch64__) \
128
- && !defined(CONFIG_TCG_INTERPRETER)
129
- /* Free all of the parts we allocated above. */
130
- for (i = real_args = 0; i < orig_nargs; ++i) {
131
- int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
132
- bool is_64bit = (argtype & ~1) == dh_typecode_i64;
133
-
134
- if (is_64bit) {
135
- tcg_temp_free_internal(args[real_args++]);
136
- tcg_temp_free_internal(args[real_args++]);
137
- } else {
138
- real_args++;
139
- }
140
- }
141
- if ((orig_typemask & 6) == dh_typecode_i64) {
142
- /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them.
143
- Note that describing these as TCGv_i64 eliminates an unnecessary
144
- zero-extension that tcg_gen_concat_i32_i64 would create. */
145
- tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
146
- tcg_temp_free_i64(retl);
147
- tcg_temp_free_i64(reth);
148
- }
149
-#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
150
+#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
151
for (i = 0; i < nargs; ++i) {
152
int argtype = extract32(typemask, (i + 1) * 3, 3);
153
bool is_32bit = (argtype & ~1) == dh_typecode_i32;
154
diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
155
index XXXXXXX..XXXXXXX 100644
156
--- a/tcg/sparc/tcg-target.c.inc
157
+++ b/tcg/sparc/tcg-target.c.inc
158
@@ -XXX,XX +XXX,XX @@
159
* THE SOFTWARE.
160
*/
161
162
+/* We only support generating code for 64-bit mode. */
163
+#ifndef __arch64__
164
+#error "unsupported code generation mode"
165
+#endif
82
+#endif
166
+
83
+
167
#include "../tcg-pool.c.inc"
84
+#endif /* AARCH64_ATOMIC128_LDST_H */
168
169
#ifdef CONFIG_DEBUG_TCG
170
@@ -XXX,XX +XXX,XX @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
171
};
172
#endif
173
174
-#ifdef __arch64__
175
-# define SPARC64 1
176
-#else
177
-# define SPARC64 0
178
-#endif
179
-
180
#define TCG_CT_CONST_S11 0x100
181
#define TCG_CT_CONST_S13 0x200
182
#define TCG_CT_CONST_ZERO 0x400
183
@@ -XXX,XX +XXX,XX @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
184
* high bits of the %i and %l registers garbage at all times.
185
*/
186
#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
187
-#if SPARC64
188
# define ALL_GENERAL_REGS64 ALL_GENERAL_REGS
189
-#else
190
-# define ALL_GENERAL_REGS64 MAKE_64BIT_MASK(0, 16)
191
-#endif
192
#define ALL_QLDST_REGS (ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
193
#define ALL_QLDST_REGS64 (ALL_GENERAL_REGS64 & ~SOFTMMU_RESERVE_REGS)
194
195
@@ -XXX,XX +XXX,XX @@ static bool check_fit_i32(int32_t val, unsigned int bits)
196
}
197
198
#define check_fit_tl check_fit_i64
199
-#if SPARC64
200
-# define check_fit_ptr check_fit_i64
201
-#else
202
-# define check_fit_ptr check_fit_i32
203
-#endif
204
+#define check_fit_ptr check_fit_i64
205
206
static bool patch_reloc(tcg_insn_unit *src_rw, int type,
207
intptr_t value, intptr_t addend)
208
@@ -XXX,XX +XXX,XX @@ static void tcg_out_sety(TCGContext *s, TCGReg rs)
209
tcg_out32(s, WRY | INSN_RS1(TCG_REG_G0) | INSN_RS2(rs));
210
}
211
212
-static void tcg_out_rdy(TCGContext *s, TCGReg rd)
213
-{
214
- tcg_out32(s, RDY | INSN_RD(rd));
215
-}
216
-
217
static void tcg_out_div32(TCGContext *s, TCGReg rd, TCGReg rs1,
218
int32_t val2, int val2const, int uns)
219
{
220
@@ -XXX,XX +XXX,XX @@ static void emit_extend(TCGContext *s, TCGReg r, int op)
221
tcg_out_arithi(s, r, r, 16, SHIFT_SRL);
222
break;
223
case MO_32:
224
- if (SPARC64) {
225
- tcg_out_arith(s, r, r, 0, SHIFT_SRL);
226
- }
227
+ tcg_out_arith(s, r, r, 0, SHIFT_SRL);
228
break;
229
case MO_64:
230
break;
231
@@ -XXX,XX +XXX,XX @@ static void build_trampolines(TCGContext *s)
232
};
233
234
int i;
235
- TCGReg ra;
236
237
for (i = 0; i < ARRAY_SIZE(qemu_ld_helpers); ++i) {
238
if (qemu_ld_helpers[i] == NULL) {
239
@@ -XXX,XX +XXX,XX @@ static void build_trampolines(TCGContext *s)
240
}
241
qemu_ld_trampoline[i] = tcg_splitwx_to_rx(s->code_ptr);
242
243
- if (SPARC64 || TARGET_LONG_BITS == 32) {
244
- ra = TCG_REG_O3;
245
- } else {
246
- /* Install the high part of the address. */
247
- tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O2, 32, SHIFT_SRLX);
248
- ra = TCG_REG_O4;
249
- }
250
-
251
/* Set the retaddr operand. */
252
- tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7);
253
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O3, TCG_REG_O7);
254
/* Tail call. */
255
tcg_out_jmpl_const(s, qemu_ld_helpers[i], true, true);
256
/* delay slot -- set the env argument */
257
@@ -XXX,XX +XXX,XX @@ static void build_trampolines(TCGContext *s)
258
}
259
qemu_st_trampoline[i] = tcg_splitwx_to_rx(s->code_ptr);
260
261
- if (SPARC64) {
262
- emit_extend(s, TCG_REG_O2, i);
263
- ra = TCG_REG_O4;
264
- } else {
265
- ra = TCG_REG_O1;
266
- if (TARGET_LONG_BITS == 64) {
267
- /* Install the high part of the address. */
268
- tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX);
269
- ra += 2;
270
- } else {
271
- ra += 1;
272
- }
273
- if ((i & MO_SIZE) == MO_64) {
274
- /* Install the high part of the data. */
275
- tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX);
276
- ra += 2;
277
- } else {
278
- emit_extend(s, ra, i);
279
- ra += 1;
280
- }
281
- /* Skip the oi argument. */
282
- ra += 1;
283
- }
284
-
285
+ emit_extend(s, TCG_REG_O2, i);
286
+
287
/* Set the retaddr operand. */
288
- if (ra >= TCG_REG_O6) {
289
- tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_O7, TCG_REG_CALL_STACK,
290
- TCG_TARGET_CALL_STACK_OFFSET);
291
- } else {
292
- tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7);
293
- }
294
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O4, TCG_REG_O7);
295
296
/* Tail call. */
297
tcg_out_jmpl_const(s, qemu_st_helpers[i], true, true);
298
@@ -XXX,XX +XXX,XX @@ static void build_trampolines(TCGContext *s)
299
qemu_unalign_st_trampoline = tcg_splitwx_to_rx(s->code_ptr);
300
}
301
302
- if (!SPARC64 && TARGET_LONG_BITS == 64) {
303
- /* Install the high part of the address. */
304
- tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O2, 32, SHIFT_SRLX);
305
- }
306
-
307
/* Tail call. */
308
tcg_out_jmpl_const(s, helper, true, true);
309
/* delay slot -- set the env argument */
310
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index,
311
tcg_out_cmp(s, r0, r2, 0);
312
313
/* If the guest address must be zero-extended, do so now. */
314
- if (SPARC64 && TARGET_LONG_BITS == 32) {
315
+ if (TARGET_LONG_BITS == 32) {
316
tcg_out_arithi(s, r0, addr, 0, SHIFT_SRL);
317
return r0;
318
}
319
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
320
321
#ifdef CONFIG_SOFTMMU
322
unsigned memi = get_mmuidx(oi);
323
- TCGReg addrz, param;
324
+ TCGReg addrz;
325
const tcg_insn_unit *func;
326
327
addrz = tcg_out_tlb_load(s, addr, memi, memop,
328
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
329
330
/* TLB Miss. */
331
332
- param = TCG_REG_O1;
333
- if (!SPARC64 && TARGET_LONG_BITS == 64) {
334
- /* Skip the high-part; we'll perform the extract in the trampoline. */
335
- param++;
336
- }
337
- tcg_out_mov(s, TCG_TYPE_REG, param++, addrz);
338
+ tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_O1, addrz);
339
340
/* We use the helpers to extend SB and SW data, leaving the case
341
of SL needing explicit extending below. */
342
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
343
tcg_debug_assert(func != NULL);
344
tcg_out_call_nodelay(s, func, false);
345
/* delay slot */
346
- tcg_out_movi(s, TCG_TYPE_I32, param, oi);
347
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_O2, oi);
348
349
- /* Recall that all of the helpers return 64-bit results.
350
- Which complicates things for sparcv8plus. */
351
- if (SPARC64) {
352
- /* We let the helper sign-extend SB and SW, but leave SL for here. */
353
- if (is_64 && (memop & MO_SSIZE) == MO_SL) {
354
- tcg_out_arithi(s, data, TCG_REG_O0, 0, SHIFT_SRA);
355
- } else {
356
- tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0);
357
- }
358
+ /* We let the helper sign-extend SB and SW, but leave SL for here. */
359
+ if (is_64 && (memop & MO_SSIZE) == MO_SL) {
360
+ tcg_out_arithi(s, data, TCG_REG_O0, 0, SHIFT_SRA);
361
} else {
362
- if ((memop & MO_SIZE) == MO_64) {
363
- tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, 32, SHIFT_SLLX);
364
- tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O1, 0, SHIFT_SRL);
365
- tcg_out_arith(s, data, TCG_REG_O0, TCG_REG_O1, ARITH_OR);
366
- } else if (is_64) {
367
- /* Re-extend from 32-bit rather than reassembling when we
368
- know the high register must be an extension. */
369
- tcg_out_arithi(s, data, TCG_REG_O1, 0,
370
- memop & MO_SIGN ? SHIFT_SRA : SHIFT_SRL);
371
- } else {
372
- tcg_out_mov(s, TCG_TYPE_I32, data, TCG_REG_O1);
373
- }
374
+ tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0);
375
}
376
377
*label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
378
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
379
unsigned s_bits = memop & MO_SIZE;
380
unsigned t_bits;
381
382
- if (SPARC64 && TARGET_LONG_BITS == 32) {
383
+ if (TARGET_LONG_BITS == 32) {
384
tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL);
385
addr = TCG_REG_T1;
386
}
387
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
388
* operation in the delay slot, and failure need only invoke the
389
* handler for SIGBUS.
390
*/
391
- TCGReg arg_low = TCG_REG_O1 + (!SPARC64 && TARGET_LONG_BITS == 64);
392
tcg_out_call_nodelay(s, qemu_unalign_ld_trampoline, false);
393
/* delay slot -- move to low part of argument reg */
394
- tcg_out_mov_delay(s, arg_low, addr);
395
+ tcg_out_mov_delay(s, TCG_REG_O1, addr);
396
} else {
397
/* Underalignment: load by pieces of minimum alignment. */
398
int ld_opc, a_size, s_size, i;
399
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
400
401
#ifdef CONFIG_SOFTMMU
402
unsigned memi = get_mmuidx(oi);
403
- TCGReg addrz, param;
404
+ TCGReg addrz;
405
const tcg_insn_unit *func;
406
407
addrz = tcg_out_tlb_load(s, addr, memi, memop,
408
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
409
410
/* TLB Miss. */
411
412
- param = TCG_REG_O1;
413
- if (!SPARC64 && TARGET_LONG_BITS == 64) {
414
- /* Skip the high-part; we'll perform the extract in the trampoline. */
415
- param++;
416
- }
417
- tcg_out_mov(s, TCG_TYPE_REG, param++, addrz);
418
- if (!SPARC64 && (memop & MO_SIZE) == MO_64) {
419
- /* Skip the high-part; we'll perform the extract in the trampoline. */
420
- param++;
421
- }
422
- tcg_out_mov(s, TCG_TYPE_REG, param++, data);
423
+ tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_O1, addrz);
424
+ tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_O2, data);
425
426
func = qemu_st_trampoline[memop & (MO_BSWAP | MO_SIZE)];
427
tcg_debug_assert(func != NULL);
428
tcg_out_call_nodelay(s, func, false);
429
/* delay slot */
430
- tcg_out_movi(s, TCG_TYPE_I32, param, oi);
431
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_O3, oi);
432
433
*label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
434
#else
435
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
436
unsigned s_bits = memop & MO_SIZE;
437
unsigned t_bits;
438
439
- if (SPARC64 && TARGET_LONG_BITS == 32) {
440
+ if (TARGET_LONG_BITS == 32) {
441
tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL);
442
addr = TCG_REG_T1;
443
}
444
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
445
* operation in the delay slot, and failure need only invoke the
446
* handler for SIGBUS.
447
*/
448
- TCGReg arg_low = TCG_REG_O1 + (!SPARC64 && TARGET_LONG_BITS == 64);
449
tcg_out_call_nodelay(s, qemu_unalign_st_trampoline, false);
450
/* delay slot -- move to low part of argument reg */
451
- tcg_out_mov_delay(s, arg_low, addr);
452
+ tcg_out_mov_delay(s, TCG_REG_O1, addr);
453
} else {
454
/* Underalignment: store by pieces of minimum alignment. */
455
int st_opc, a_size, s_size, i;
456
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
457
case INDEX_op_muls2_i32:
458
c = ARITH_SMUL;
459
do_mul2:
460
- /* The 32-bit multiply insns produce a full 64-bit result. If the
461
- destination register can hold it, we can avoid the slower RDY. */
462
+ /* The 32-bit multiply insns produce a full 64-bit result. */
463
tcg_out_arithc(s, a0, a2, args[3], const_args[3], c);
464
- if (SPARC64 || a0 <= TCG_REG_O7) {
465
- tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX);
466
- } else {
467
- tcg_out_rdy(s, a1);
468
- }
469
+ tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX);
470
break;
471
472
case INDEX_op_qemu_ld_i32:
473
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
474
tcg_regset_set_reg(s->reserved_regs, TCG_REG_T2); /* for internal use */
475
}
476
477
-#if SPARC64
478
-# define ELF_HOST_MACHINE EM_SPARCV9
479
-#else
480
-# define ELF_HOST_MACHINE EM_SPARC32PLUS
481
-# define ELF_HOST_FLAGS EF_SPARC_32PLUS
482
-#endif
483
+#define ELF_HOST_MACHINE EM_SPARCV9
484
485
typedef struct {
486
DebugFrameHeader h;
487
- uint8_t fde_def_cfa[SPARC64 ? 4 : 2];
488
+ uint8_t fde_def_cfa[4];
489
uint8_t fde_win_save;
490
uint8_t fde_ret_save[3];
491
} DebugFrame;
492
@@ -XXX,XX +XXX,XX @@ static const DebugFrame debug_frame = {
493
.h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
494
495
.fde_def_cfa = {
496
-#if SPARC64
497
12, 30, /* DW_CFA_def_cfa i6, 2047 */
498
(2047 & 0x7f) | 0x80, (2047 >> 7)
499
-#else
500
- 13, 30 /* DW_CFA_def_cfa_register i6 */
501
-#endif
502
},
503
.fde_win_save = 0x2d, /* DW_CFA_GNU_window_save */
504
.fde_ret_save = { 9, 15, 31 }, /* DW_CFA_register o7, i7 */
505
--
85
--
506
2.34.1
86
2.34.1
507
87
508
88
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/i386/tcg-target.h | 4 +-
5
tcg/i386/tcg-target.c.inc | 191 +++++++++++++++++++++++++++++++++++++-
6
2 files changed, 190 insertions(+), 5 deletions(-)
1
7
8
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/i386/tcg-target.h
11
+++ b/tcg/i386/tcg-target.h
12
@@ -XXX,XX +XXX,XX @@ typedef enum {
13
#define have_avx1 (cpuinfo & CPUINFO_AVX1)
14
#define have_avx2 (cpuinfo & CPUINFO_AVX2)
15
#define have_movbe (cpuinfo & CPUINFO_MOVBE)
16
-#define have_atomic16 (cpuinfo & CPUINFO_ATOMIC_VMOVDQA)
17
18
/*
19
* There are interesting instructions in AVX512, so long as we have AVX512VL,
20
@@ -XXX,XX +XXX,XX @@ typedef enum {
21
#define TCG_TARGET_HAS_qemu_st8_i32 1
22
#endif
23
24
-#define TCG_TARGET_HAS_qemu_ldst_i128 0
25
+#define TCG_TARGET_HAS_qemu_ldst_i128 \
26
+ (TCG_TARGET_REG_BITS == 64 && (cpuinfo & CPUINFO_ATOMIC_VMOVDQA))
27
28
/* We do not support older SSE systems, only beginning with AVX1. */
29
#define TCG_TARGET_HAS_v64 have_avx1
30
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
31
index XXXXXXX..XXXXXXX 100644
32
--- a/tcg/i386/tcg-target.c.inc
33
+++ b/tcg/i386/tcg-target.c.inc
34
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = {
35
#endif
36
};
37
38
+#define TCG_TMP_VEC TCG_REG_XMM5
39
+
40
static const int tcg_target_call_iarg_regs[] = {
41
#if TCG_TARGET_REG_BITS == 64
42
#if defined(_WIN64)
43
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
44
#define OPC_PCMPGTW (0x65 | P_EXT | P_DATA16)
45
#define OPC_PCMPGTD (0x66 | P_EXT | P_DATA16)
46
#define OPC_PCMPGTQ (0x37 | P_EXT38 | P_DATA16)
47
+#define OPC_PEXTRD (0x16 | P_EXT3A | P_DATA16)
48
+#define OPC_PINSRD (0x22 | P_EXT3A | P_DATA16)
49
#define OPC_PMAXSB (0x3c | P_EXT38 | P_DATA16)
50
#define OPC_PMAXSW (0xee | P_EXT | P_DATA16)
51
#define OPC_PMAXSD (0x3d | P_EXT38 | P_DATA16)
52
@@ -XXX,XX +XXX,XX @@ typedef struct {
53
54
bool tcg_target_has_memory_bswap(MemOp memop)
55
{
56
- return have_movbe;
57
+ TCGAtomAlign aa;
58
+
59
+ if (!have_movbe) {
60
+ return false;
61
+ }
62
+ if ((memop & MO_SIZE) < MO_128) {
63
+ return true;
64
+ }
65
+
66
+ /*
67
+ * Reject 16-byte memop with 16-byte atomicity, i.e. VMOVDQA,
68
+ * but do allow a pair of 64-bit operations, i.e. MOVBEQ.
69
+ */
70
+ aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
71
+ return aa.atom < MO_128;
72
}
73
74
/*
75
@@ -XXX,XX +XXX,XX @@ static const TCGLdstHelperParam ldst_helper_param = {
76
static const TCGLdstHelperParam ldst_helper_param = { };
77
#endif
78
79
+static void tcg_out_vec_to_pair(TCGContext *s, TCGType type,
80
+ TCGReg l, TCGReg h, TCGReg v)
81
+{
82
+ int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW;
83
+
84
+ /* vpmov{d,q} %v, %l */
85
+ tcg_out_vex_modrm(s, OPC_MOVD_EyVy + rexw, v, 0, l);
86
+ /* vpextr{d,q} $1, %v, %h */
87
+ tcg_out_vex_modrm(s, OPC_PEXTRD + rexw, v, 0, h);
88
+ tcg_out8(s, 1);
89
+}
90
+
91
+static void tcg_out_pair_to_vec(TCGContext *s, TCGType type,
92
+ TCGReg v, TCGReg l, TCGReg h)
93
+{
94
+ int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW;
95
+
96
+ /* vmov{d,q} %l, %v */
97
+ tcg_out_vex_modrm(s, OPC_MOVD_VyEy + rexw, v, 0, l);
98
+ /* vpinsr{d,q} $1, %h, %v, %v */
99
+ tcg_out_vex_modrm(s, OPC_PINSRD + rexw, v, v, h);
100
+ tcg_out8(s, 1);
101
+}
102
+
103
/*
104
* Generate code for the slow path for a load at the end of block
105
*/
106
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
107
{
108
TCGLabelQemuLdst *ldst = NULL;
109
MemOp opc = get_memop(oi);
110
+ MemOp s_bits = opc & MO_SIZE;
111
unsigned a_mask;
112
113
#ifdef CONFIG_SOFTMMU
114
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
115
*h = x86_guest_base;
116
#endif
117
h->base = addrlo;
118
- h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
119
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128);
120
a_mask = (1 << h->aa.align) - 1;
121
122
#ifdef CONFIG_SOFTMMU
123
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
124
TCGType tlbtype = TCG_TYPE_I32;
125
int trexw = 0, hrexw = 0, tlbrexw = 0;
126
unsigned mem_index = get_mmuidx(oi);
127
- unsigned s_bits = opc & MO_SIZE;
128
unsigned s_mask = (1 << s_bits) - 1;
129
int tlb_mask;
130
131
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
132
h.base, h.index, 0, h.ofs + 4);
133
}
134
break;
135
+
136
+ case MO_128:
137
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
138
+
139
+ /*
140
+ * Without 16-byte atomicity, use integer regs.
141
+ * That is where we want the data, and it allows bswaps.
142
+ */
143
+ if (h.aa.atom < MO_128) {
144
+ if (use_movbe) {
145
+ TCGReg t = datalo;
146
+ datalo = datahi;
147
+ datahi = t;
148
+ }
149
+ if (h.base == datalo || h.index == datalo) {
150
+ tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, datahi,
151
+ h.base, h.index, 0, h.ofs);
152
+ tcg_out_modrm_offset(s, movop + P_REXW + h.seg,
153
+ datalo, datahi, 0);
154
+ tcg_out_modrm_offset(s, movop + P_REXW + h.seg,
155
+ datahi, datahi, 8);
156
+ } else {
157
+ tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo,
158
+ h.base, h.index, 0, h.ofs);
159
+ tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datahi,
160
+ h.base, h.index, 0, h.ofs + 8);
161
+ }
162
+ break;
163
+ }
164
+
165
+ /*
166
+ * With 16-byte atomicity, a vector load is required.
167
+ * If we already have 16-byte alignment, then VMOVDQA always works.
168
+ * Else if VMOVDQU has atomicity with dynamic alignment, use that.
169
+ * Else use we require a runtime test for alignment for VMOVDQA;
170
+ * use VMOVDQU on the unaligned nonatomic path for simplicity.
171
+ */
172
+ if (h.aa.align >= MO_128) {
173
+ tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_VxWx + h.seg,
174
+ TCG_TMP_VEC, 0,
175
+ h.base, h.index, 0, h.ofs);
176
+ } else if (cpuinfo & CPUINFO_ATOMIC_VMOVDQU) {
177
+ tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_VxWx + h.seg,
178
+ TCG_TMP_VEC, 0,
179
+ h.base, h.index, 0, h.ofs);
180
+ } else {
181
+ TCGLabel *l1 = gen_new_label();
182
+ TCGLabel *l2 = gen_new_label();
183
+
184
+ tcg_out_testi(s, h.base, 15);
185
+ tcg_out_jxx(s, JCC_JNE, l1, true);
186
+
187
+ tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_VxWx + h.seg,
188
+ TCG_TMP_VEC, 0,
189
+ h.base, h.index, 0, h.ofs);
190
+ tcg_out_jxx(s, JCC_JMP, l2, true);
191
+
192
+ tcg_out_label(s, l1);
193
+ tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_VxWx + h.seg,
194
+ TCG_TMP_VEC, 0,
195
+ h.base, h.index, 0, h.ofs);
196
+ tcg_out_label(s, l2);
197
+ }
198
+ tcg_out_vec_to_pair(s, TCG_TYPE_I64, datalo, datahi, TCG_TMP_VEC);
199
+ break;
200
+
201
default:
202
g_assert_not_reached();
203
}
204
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
205
h.base, h.index, 0, h.ofs + 4);
206
}
207
break;
208
+
209
+ case MO_128:
210
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
211
+
212
+ /*
213
+ * Without 16-byte atomicity, use integer regs.
214
+ * That is where we have the data, and it allows bswaps.
215
+ */
216
+ if (h.aa.atom < MO_128) {
217
+ if (use_movbe) {
218
+ TCGReg t = datalo;
219
+ datalo = datahi;
220
+ datahi = t;
221
+ }
222
+ tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo,
223
+ h.base, h.index, 0, h.ofs);
224
+ tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datahi,
225
+ h.base, h.index, 0, h.ofs + 8);
226
+ break;
227
+ }
228
+
229
+ /*
230
+ * With 16-byte atomicity, a vector store is required.
231
+ * If we already have 16-byte alignment, then VMOVDQA always works.
232
+ * Else if VMOVDQU has atomicity with dynamic alignment, use that.
233
+ * Else use we require a runtime test for alignment for VMOVDQA;
234
+ * use VMOVDQU on the unaligned nonatomic path for simplicity.
235
+ */
236
+ tcg_out_pair_to_vec(s, TCG_TYPE_I64, TCG_TMP_VEC, datalo, datahi);
237
+ if (h.aa.align >= MO_128) {
238
+ tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_WxVx + h.seg,
239
+ TCG_TMP_VEC, 0,
240
+ h.base, h.index, 0, h.ofs);
241
+ } else if (cpuinfo & CPUINFO_ATOMIC_VMOVDQU) {
242
+ tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_WxVx + h.seg,
243
+ TCG_TMP_VEC, 0,
244
+ h.base, h.index, 0, h.ofs);
245
+ } else {
246
+ TCGLabel *l1 = gen_new_label();
247
+ TCGLabel *l2 = gen_new_label();
248
+
249
+ tcg_out_testi(s, h.base, 15);
250
+ tcg_out_jxx(s, JCC_JNE, l1, true);
251
+
252
+ tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_WxVx + h.seg,
253
+ TCG_TMP_VEC, 0,
254
+ h.base, h.index, 0, h.ofs);
255
+ tcg_out_jxx(s, JCC_JMP, l2, true);
256
+
257
+ tcg_out_label(s, l1);
258
+ tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQU_WxVx + h.seg,
259
+ TCG_TMP_VEC, 0,
260
+ h.base, h.index, 0, h.ofs);
261
+ tcg_out_label(s, l2);
262
+ }
263
+ break;
264
+
265
default:
266
g_assert_not_reached();
267
}
268
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
269
tcg_out_qemu_ld(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
270
}
271
break;
272
+ case INDEX_op_qemu_ld_a32_i128:
273
+ case INDEX_op_qemu_ld_a64_i128:
274
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
275
+ tcg_out_qemu_ld(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128);
276
+ break;
277
278
case INDEX_op_qemu_st_a64_i32:
279
case INDEX_op_qemu_st8_a64_i32:
280
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
281
tcg_out_qemu_st(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
282
}
283
break;
284
+ case INDEX_op_qemu_st_a32_i128:
285
+ case INDEX_op_qemu_st_a64_i128:
286
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
287
+ tcg_out_qemu_st(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128);
288
+ break;
289
290
OP_32_64(mulu2):
291
tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
292
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
293
case INDEX_op_qemu_st_a64_i64:
294
return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I4(L, L, L, L);
295
296
+ case INDEX_op_qemu_ld_a32_i128:
297
+ case INDEX_op_qemu_ld_a64_i128:
298
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
299
+ return C_O2_I1(r, r, L);
300
+ case INDEX_op_qemu_st_a32_i128:
301
+ case INDEX_op_qemu_st_a64_i128:
302
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
303
+ return C_O0_I3(L, L, L);
304
+
305
case INDEX_op_brcond2_i32:
306
return C_O0_I4(r, r, ri, ri);
307
308
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
309
310
s->reserved_regs = 0;
311
tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
312
+ tcg_regset_set_reg(s->reserved_regs, TCG_TMP_VEC);
313
#ifdef _WIN64
314
/* These are call saved, and we don't save them, so don't use them. */
315
tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM6);
316
--
317
2.34.1
diff view generated by jsdifflib
1
The value passed is always true, and if the target's
1
We will need to allocate a second general-purpose temporary.
2
synchronize_from_tb hook is non-trivial, not exiting
2
Rename the existing temps to add a distinguishing number.
3
may be erroneous.
4
3
5
Reviewed-by: Claudio Fontana <cfontana@suse.de>
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
include/exec/exec-all.h | 5 +----
7
tcg/aarch64/tcg-target.c.inc | 50 ++++++++++++++++++------------------
9
accel/tcg/cpu-exec-common.c | 2 +-
8
1 file changed, 25 insertions(+), 25 deletions(-)
10
accel/tcg/translate-all.c | 12 ++----------
11
target/alpha/helper.c | 2 +-
12
target/alpha/mem_helper.c | 2 +-
13
target/arm/op_helper.c | 2 +-
14
target/arm/tlb_helper.c | 8 ++++----
15
target/cris/helper.c | 2 +-
16
target/i386/tcg/sysemu/svm_helper.c | 2 +-
17
target/m68k/op_helper.c | 4 ++--
18
target/microblaze/helper.c | 2 +-
19
target/nios2/op_helper.c | 2 +-
20
target/openrisc/sys_helper.c | 4 ++--
21
target/ppc/excp_helper.c | 2 +-
22
target/s390x/tcg/excp_helper.c | 2 +-
23
target/tricore/op_helper.c | 2 +-
24
target/xtensa/helper.c | 6 +++---
25
17 files changed, 25 insertions(+), 36 deletions(-)
26
9
27
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
10
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
28
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
29
--- a/include/exec/exec-all.h
12
--- a/tcg/aarch64/tcg-target.c.inc
30
+++ b/include/exec/exec-all.h
13
+++ b/tcg/aarch64/tcg-target.c.inc
31
@@ -XXX,XX +XXX,XX @@ bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data);
14
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
32
* cpu_restore_state:
15
return TCG_REG_X0 + slot;
33
* @cpu: the cpu context
16
}
34
* @host_pc: the host pc within the translation
17
35
- * @will_exit: true if the TB executed will be interrupted after some
18
-#define TCG_REG_TMP TCG_REG_X30
36
- cpu adjustments. Required for maintaining the correct
19
-#define TCG_VEC_TMP TCG_REG_V31
37
- icount valus
20
+#define TCG_REG_TMP0 TCG_REG_X30
38
* @return: true if state was restored, false otherwise
21
+#define TCG_VEC_TMP0 TCG_REG_V31
39
*
22
40
* Attempt to restore the state for a fault occurring in translated
23
#ifndef CONFIG_SOFTMMU
41
* code. If @host_pc is not in translated code no state is
24
#define TCG_REG_GUEST_BASE TCG_REG_X28
42
* restored and the function returns false.
25
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
43
*/
26
static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
44
-bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit);
27
TCGReg r, TCGReg base, intptr_t offset)
45
+bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc);
46
47
G_NORETURN void cpu_loop_exit_noexc(CPUState *cpu);
48
G_NORETURN void cpu_loop_exit(CPUState *cpu);
49
diff --git a/accel/tcg/cpu-exec-common.c b/accel/tcg/cpu-exec-common.c
50
index XXXXXXX..XXXXXXX 100644
51
--- a/accel/tcg/cpu-exec-common.c
52
+++ b/accel/tcg/cpu-exec-common.c
53
@@ -XXX,XX +XXX,XX @@ void cpu_loop_exit(CPUState *cpu)
54
void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc)
55
{
28
{
56
if (pc) {
29
- TCGReg temp = TCG_REG_TMP;
57
- cpu_restore_state(cpu, pc, true);
30
+ TCGReg temp = TCG_REG_TMP0;
58
+ cpu_restore_state(cpu, pc);
31
32
if (offset < -0xffffff || offset > 0xffffff) {
33
tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
34
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
59
}
35
}
60
cpu_loop_exit(cpu);
36
37
/* Worst-case scenario, move offset to temp register, use reg offset. */
38
- tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
39
- tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
40
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, offset);
41
+ tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP0);
61
}
42
}
62
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
43
63
index XXXXXXX..XXXXXXX 100644
44
static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
64
--- a/accel/tcg/translate-all.c
45
@@ -XXX,XX +XXX,XX @@ static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
65
+++ b/accel/tcg/translate-all.c
46
if (offset == sextract64(offset, 0, 26)) {
66
@@ -XXX,XX +XXX,XX @@ void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
47
tcg_out_insn(s, 3206, BL, offset);
67
#endif
68
}
69
70
-bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit)
71
+bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc)
72
{
73
- /*
74
- * The pc update associated with restore without exit will
75
- * break the relative pc adjustments performed by TARGET_TB_PCREL.
76
- */
77
- if (TARGET_TB_PCREL) {
78
- assert(will_exit);
79
- }
80
-
81
/*
82
* The host_pc has to be in the rx region of the code buffer.
83
* If it is not we will not be able to resolve it here.
84
@@ -XXX,XX +XXX,XX @@ bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit)
85
if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
86
TranslationBlock *tb = tcg_tb_lookup(host_pc);
87
if (tb) {
88
- cpu_restore_state_from_tb(cpu, tb, host_pc, will_exit);
89
+ cpu_restore_state_from_tb(cpu, tb, host_pc, true);
90
return true;
91
}
92
}
93
diff --git a/target/alpha/helper.c b/target/alpha/helper.c
94
index XXXXXXX..XXXXXXX 100644
95
--- a/target/alpha/helper.c
96
+++ b/target/alpha/helper.c
97
@@ -XXX,XX +XXX,XX @@ G_NORETURN void dynamic_excp(CPUAlphaState *env, uintptr_t retaddr,
98
cs->exception_index = excp;
99
env->error_code = error;
100
if (retaddr) {
101
- cpu_restore_state(cs, retaddr, true);
102
+ cpu_restore_state(cs, retaddr);
103
/* Floating-point exceptions (our only users) point to the next PC. */
104
env->pc += 4;
105
}
106
diff --git a/target/alpha/mem_helper.c b/target/alpha/mem_helper.c
107
index XXXXXXX..XXXXXXX 100644
108
--- a/target/alpha/mem_helper.c
109
+++ b/target/alpha/mem_helper.c
110
@@ -XXX,XX +XXX,XX @@ static void do_unaligned_access(CPUAlphaState *env, vaddr addr, uintptr_t retadd
111
uint64_t pc;
112
uint32_t insn;
113
114
- cpu_restore_state(env_cpu(env), retaddr, true);
115
+ cpu_restore_state(env_cpu(env), retaddr);
116
117
pc = env->pc;
118
insn = cpu_ldl_code(env, pc);
119
diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
120
index XXXXXXX..XXXXXXX 100644
121
--- a/target/arm/op_helper.c
122
+++ b/target/arm/op_helper.c
123
@@ -XXX,XX +XXX,XX @@ void raise_exception_ra(CPUARMState *env, uint32_t excp, uint32_t syndrome,
124
* we must restore CPU state here before setting the syndrome
125
* the caller passed us, and cannot use cpu_loop_exit_restore().
126
*/
127
- cpu_restore_state(cs, ra, true);
128
+ cpu_restore_state(cs, ra);
129
raise_exception(env, excp, syndrome, target_el);
130
}
131
132
diff --git a/target/arm/tlb_helper.c b/target/arm/tlb_helper.c
133
index XXXXXXX..XXXXXXX 100644
134
--- a/target/arm/tlb_helper.c
135
+++ b/target/arm/tlb_helper.c
136
@@ -XXX,XX +XXX,XX @@ void arm_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
137
ARMMMUFaultInfo fi = {};
138
139
/* now we have a real cpu fault */
140
- cpu_restore_state(cs, retaddr, true);
141
+ cpu_restore_state(cs, retaddr);
142
143
fi.type = ARMFault_Alignment;
144
arm_deliver_fault(cpu, vaddr, access_type, mmu_idx, &fi);
145
@@ -XXX,XX +XXX,XX @@ void arm_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr,
146
ARMMMUFaultInfo fi = {};
147
148
/* now we have a real cpu fault */
149
- cpu_restore_state(cs, retaddr, true);
150
+ cpu_restore_state(cs, retaddr);
151
152
fi.ea = arm_extabort_type(response);
153
fi.type = ARMFault_SyncExternal;
154
@@ -XXX,XX +XXX,XX @@ bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
155
return false;
156
} else {
48
} else {
157
/* now we have a real cpu fault */
49
- tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
158
- cpu_restore_state(cs, retaddr, true);
50
- tcg_out_insn(s, 3207, BLR, TCG_REG_TMP);
159
+ cpu_restore_state(cs, retaddr);
51
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
160
arm_deliver_fault(cpu, address, access_type, mmu_idx, fi);
52
+ tcg_out_insn(s, 3207, BLR, TCG_REG_TMP0);
161
}
53
}
162
}
54
}
163
@@ -XXX,XX +XXX,XX @@ void arm_cpu_record_sigsegv(CPUState *cs, vaddr addr,
55
164
* We report both ESR and FAR to signal handlers.
56
@@ -XXX,XX +XXX,XX @@ static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
165
* For now, it's easiest to deliver the fault normally.
57
AArch64Insn insn;
166
*/
58
167
- cpu_restore_state(cs, ra, true);
59
if (rl == ah || (!const_bh && rl == bh)) {
168
+ cpu_restore_state(cs, ra);
60
- rl = TCG_REG_TMP;
169
arm_deliver_fault(cpu, addr, access_type, MMU_USER_IDX, &fi);
61
+ rl = TCG_REG_TMP0;
170
}
62
}
171
63
172
diff --git a/target/cris/helper.c b/target/cris/helper.c
64
if (const_bl) {
173
index XXXXXXX..XXXXXXX 100644
65
@@ -XXX,XX +XXX,XX @@ static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
174
--- a/target/cris/helper.c
66
possibility of adding 0+const in the low part, and the
175
+++ b/target/cris/helper.c
67
immediate add instructions encode XSP not XZR. Don't try
176
@@ -XXX,XX +XXX,XX @@ bool cris_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
68
anything more elaborate here than loading another zero. */
177
cs->exception_index = EXCP_BUSFAULT;
69
- al = TCG_REG_TMP;
178
env->fault_vector = res.bf_vec;
70
+ al = TCG_REG_TMP0;
179
if (retaddr) {
71
tcg_out_movi(s, ext, al, 0);
180
- if (cpu_restore_state(cs, retaddr, true)) {
181
+ if (cpu_restore_state(cs, retaddr)) {
182
/* Evaluate flags after retranslation. */
183
helper_top_evaluate_flags(env);
184
}
72
}
185
diff --git a/target/i386/tcg/sysemu/svm_helper.c b/target/i386/tcg/sysemu/svm_helper.c
73
tcg_out_insn_3401(s, insn, ext, rl, al, bl);
186
index XXXXXXX..XXXXXXX 100644
74
@@ -XXX,XX +XXX,XX @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
187
--- a/target/i386/tcg/sysemu/svm_helper.c
188
+++ b/target/i386/tcg/sysemu/svm_helper.c
189
@@ -XXX,XX +XXX,XX @@ void cpu_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1,
190
{
75
{
191
CPUState *cs = env_cpu(env);
76
TCGReg a1 = a0;
192
77
if (is_ctz) {
193
- cpu_restore_state(cs, retaddr, true);
78
- a1 = TCG_REG_TMP;
194
+ cpu_restore_state(cs, retaddr);
79
+ a1 = TCG_REG_TMP0;
195
80
tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
196
qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmexit(%08x, %016" PRIx64 ", %016"
81
}
197
PRIx64 ", " TARGET_FMT_lx ")!\n",
82
if (const_b && b == (ext ? 64 : 32)) {
198
diff --git a/target/m68k/op_helper.c b/target/m68k/op_helper.c
83
@@ -XXX,XX +XXX,XX @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
199
index XXXXXXX..XXXXXXX 100644
84
AArch64Insn sel = I3506_CSEL;
200
--- a/target/m68k/op_helper.c
85
201
+++ b/target/m68k/op_helper.c
86
tcg_out_cmp(s, ext, a0, 0, 1);
202
@@ -XXX,XX +XXX,XX @@ void m68k_cpu_transaction_failed(CPUState *cs, hwaddr physaddr, vaddr addr,
87
- tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
203
M68kCPU *cpu = M68K_CPU(cs);
88
+ tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP0, a1);
204
CPUM68KState *env = &cpu->env;
89
205
90
if (const_b) {
206
- cpu_restore_state(cs, retaddr, true);
91
if (b == -1) {
207
+ cpu_restore_state(cs, retaddr);
92
@@ -XXX,XX +XXX,XX @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
208
93
b = d;
209
if (m68k_feature(env, M68K_FEATURE_M68040)) {
94
}
210
env->mmu.mmusr = 0;
95
}
211
@@ -XXX,XX +XXX,XX @@ raise_exception_format2(CPUM68KState *env, int tt, int ilen, uintptr_t raddr)
96
- tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
212
cs->exception_index = tt;
97
+ tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP0, b, TCG_COND_NE);
213
214
/* Recover PC and CC_OP for the beginning of the insn. */
215
- cpu_restore_state(cs, raddr, true);
216
+ cpu_restore_state(cs, raddr);
217
218
/* Flags are current in env->cc_*, or are undefined. */
219
env->cc_op = CC_OP_FLAGS;
220
diff --git a/target/microblaze/helper.c b/target/microblaze/helper.c
221
index XXXXXXX..XXXXXXX 100644
222
--- a/target/microblaze/helper.c
223
+++ b/target/microblaze/helper.c
224
@@ -XXX,XX +XXX,XX @@ void mb_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
225
uint32_t esr, iflags;
226
227
/* Recover the pc and iflags from the corresponding insn_start. */
228
- cpu_restore_state(cs, retaddr, true);
229
+ cpu_restore_state(cs, retaddr);
230
iflags = cpu->env.iflags;
231
232
qemu_log_mask(CPU_LOG_INT,
233
diff --git a/target/nios2/op_helper.c b/target/nios2/op_helper.c
234
index XXXXXXX..XXXXXXX 100644
235
--- a/target/nios2/op_helper.c
236
+++ b/target/nios2/op_helper.c
237
@@ -XXX,XX +XXX,XX @@ void nios2_cpu_loop_exit_advance(CPUNios2State *env, uintptr_t retaddr)
238
* Do this here, rather than in restore_state_to_opc(),
239
* lest we affect QEMU internal exceptions, like EXCP_DEBUG.
240
*/
241
- cpu_restore_state(cs, retaddr, true);
242
+ cpu_restore_state(cs, retaddr);
243
env->pc += 4;
244
cpu_loop_exit(cs);
245
}
246
diff --git a/target/openrisc/sys_helper.c b/target/openrisc/sys_helper.c
247
index XXXXXXX..XXXXXXX 100644
248
--- a/target/openrisc/sys_helper.c
249
+++ b/target/openrisc/sys_helper.c
250
@@ -XXX,XX +XXX,XX @@ void HELPER(mtspr)(CPUOpenRISCState *env, target_ulong spr, target_ulong rb)
251
break;
252
253
case TO_SPR(0, 16): /* NPC */
254
- cpu_restore_state(cs, GETPC(), true);
255
+ cpu_restore_state(cs, GETPC());
256
/* ??? Mirror or1ksim in not trashing delayed branch state
257
when "jumping" to the current instruction. */
258
if (env->pc != rb) {
259
@@ -XXX,XX +XXX,XX @@ void HELPER(mtspr)(CPUOpenRISCState *env, target_ulong spr, target_ulong rb)
260
case TO_SPR(8, 0): /* PMR */
261
env->pmr = rb;
262
if (env->pmr & PMR_DME || env->pmr & PMR_SME) {
263
- cpu_restore_state(cs, GETPC(), true);
264
+ cpu_restore_state(cs, GETPC());
265
env->pc += 4;
266
cs->halted = 1;
267
raise_exception(cpu, EXCP_HALTED);
268
diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
269
index XXXXXXX..XXXXXXX 100644
270
--- a/target/ppc/excp_helper.c
271
+++ b/target/ppc/excp_helper.c
272
@@ -XXX,XX +XXX,XX @@ void ppc_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
273
uint32_t insn;
274
275
/* Restore state and reload the insn we executed, for filling in DSISR. */
276
- cpu_restore_state(cs, retaddr, true);
277
+ cpu_restore_state(cs, retaddr);
278
insn = cpu_ldl_code(env, env->nip);
279
280
switch (env->mmu_model) {
281
diff --git a/target/s390x/tcg/excp_helper.c b/target/s390x/tcg/excp_helper.c
282
index XXXXXXX..XXXXXXX 100644
283
--- a/target/s390x/tcg/excp_helper.c
284
+++ b/target/s390x/tcg/excp_helper.c
285
@@ -XXX,XX +XXX,XX @@ G_NORETURN void tcg_s390_program_interrupt(CPUS390XState *env,
286
{
287
CPUState *cs = env_cpu(env);
288
289
- cpu_restore_state(cs, ra, true);
290
+ cpu_restore_state(cs, ra);
291
qemu_log_mask(CPU_LOG_INT, "program interrupt at %#" PRIx64 "\n",
292
env->psw.addr);
293
trigger_pgm_exception(env, code);
294
diff --git a/target/tricore/op_helper.c b/target/tricore/op_helper.c
295
index XXXXXXX..XXXXXXX 100644
296
--- a/target/tricore/op_helper.c
297
+++ b/target/tricore/op_helper.c
298
@@ -XXX,XX +XXX,XX @@ void raise_exception_sync_internal(CPUTriCoreState *env, uint32_t class, int tin
299
{
300
CPUState *cs = env_cpu(env);
301
/* in case we come from a helper-call we need to restore the PC */
302
- cpu_restore_state(cs, pc, true);
303
+ cpu_restore_state(cs, pc);
304
305
/* Tin is loaded into d[15] */
306
env->gpr_d[15] = tin;
307
diff --git a/target/xtensa/helper.c b/target/xtensa/helper.c
308
index XXXXXXX..XXXXXXX 100644
309
--- a/target/xtensa/helper.c
310
+++ b/target/xtensa/helper.c
311
@@ -XXX,XX +XXX,XX @@ void xtensa_cpu_do_unaligned_access(CPUState *cs,
312
313
assert(xtensa_option_enabled(env->config,
314
XTENSA_OPTION_UNALIGNED_EXCEPTION));
315
- cpu_restore_state(CPU(cpu), retaddr, true);
316
+ cpu_restore_state(CPU(cpu), retaddr);
317
HELPER(exception_cause_vaddr)(env,
318
env->pc, LOAD_STORE_ALIGNMENT_CAUSE,
319
addr);
320
@@ -XXX,XX +XXX,XX @@ bool xtensa_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
321
} else if (probe) {
322
return false;
323
} else {
324
- cpu_restore_state(cs, retaddr, true);
325
+ cpu_restore_state(cs, retaddr);
326
HELPER(exception_cause_vaddr)(env, env->pc, ret, address);
327
}
98
}
328
}
99
}
329
@@ -XXX,XX +XXX,XX @@ void xtensa_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr, vaddr addr,
100
330
XtensaCPU *cpu = XTENSA_CPU(cs);
101
@@ -XXX,XX +XXX,XX @@ bool tcg_target_has_memory_bswap(MemOp memop)
331
CPUXtensaState *env = &cpu->env;
102
}
332
103
333
- cpu_restore_state(cs, retaddr, true);
104
static const TCGLdstHelperParam ldst_helper_param = {
334
+ cpu_restore_state(cs, retaddr);
105
- .ntmp = 1, .tmp = { TCG_REG_TMP }
335
HELPER(exception_cause_vaddr)(env, env->pc,
106
+ .ntmp = 1, .tmp = { TCG_REG_TMP0 }
336
access_type == MMU_INST_FETCH ?
107
};
337
INSTR_PIF_ADDR_ERROR_CAUSE :
108
109
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
110
@@ -XXX,XX +XXX,XX @@ static void tcg_out_goto_tb(TCGContext *s, int which)
111
112
set_jmp_insn_offset(s, which);
113
tcg_out32(s, I3206_B);
114
- tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
115
+ tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
116
set_jmp_reset_offset(s, which);
117
}
118
119
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
120
ptrdiff_t i_offset = i_addr - jmp_rx;
121
122
/* Note that we asserted this in range in tcg_out_goto_tb. */
123
- insn = deposit32(I3305_LDR | TCG_REG_TMP, 5, 19, i_offset >> 2);
124
+ insn = deposit32(I3305_LDR | TCG_REG_TMP0, 5, 19, i_offset >> 2);
125
}
126
qatomic_set((uint32_t *)jmp_rw, insn);
127
flush_idcache_range(jmp_rx, jmp_rw, 4);
128
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
129
130
case INDEX_op_rem_i64:
131
case INDEX_op_rem_i32:
132
- tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
133
- tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
134
+ tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP0, a1, a2);
135
+ tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
136
break;
137
case INDEX_op_remu_i64:
138
case INDEX_op_remu_i32:
139
- tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
140
- tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
141
+ tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP0, a1, a2);
142
+ tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
143
break;
144
145
case INDEX_op_shl_i64:
146
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
147
if (c2) {
148
tcg_out_rotl(s, ext, a0, a1, a2);
149
} else {
150
- tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
151
- tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
152
+ tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP0, TCG_REG_XZR, a2);
153
+ tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP0);
154
}
155
break;
156
157
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
158
break;
159
}
160
}
161
- tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
162
- a2 = TCG_VEC_TMP;
163
+ tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP0, 0);
164
+ a2 = TCG_VEC_TMP0;
165
}
166
if (is_scalar) {
167
insn = cmp_scalar_insn[cond];
168
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
169
s->reserved_regs = 0;
170
tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
171
tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
172
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
173
tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
174
- tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
175
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
176
+ tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
177
}
178
179
/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
338
--
180
--
339
2.34.1
181
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/aarch64/tcg-target.c.inc | 9 +++++++--
5
1 file changed, 7 insertions(+), 2 deletions(-)
1
6
7
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/aarch64/tcg-target.c.inc
10
+++ b/tcg/aarch64/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = {
12
13
TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
14
TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
15
- TCG_REG_X16, TCG_REG_X17,
16
17
TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
18
TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
19
20
+ /* X16 reserved as temporary */
21
+ /* X17 reserved as temporary */
22
/* X18 reserved by system */
23
/* X19 reserved for AREG0 */
24
/* X29 reserved as fp */
25
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
26
return TCG_REG_X0 + slot;
27
}
28
29
-#define TCG_REG_TMP0 TCG_REG_X30
30
+#define TCG_REG_TMP0 TCG_REG_X16
31
+#define TCG_REG_TMP1 TCG_REG_X17
32
+#define TCG_REG_TMP2 TCG_REG_X30
33
#define TCG_VEC_TMP0 TCG_REG_V31
34
35
#ifndef CONFIG_SOFTMMU
36
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
37
tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
38
tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
39
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
40
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
41
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
42
tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
43
}
44
45
--
46
2.34.1
diff view generated by jsdifflib
1
With sparc64 we need not distinguish between registers that
1
Adjust the softmmu tlb to use TMP[0-2], not any of the normally available
2
can hold 32-bit values and those that can hold 64-bit values.
2
registers. Since we handle overlap betwen inputs and helper arguments,
3
we can allow any allocatable reg.
3
4
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
---
7
tcg/sparc64/tcg-target-con-set.h | 16 +----
8
tcg/aarch64/tcg-target-con-set.h | 2 --
8
tcg/sparc64/tcg-target-con-str.h | 3 -
9
tcg/aarch64/tcg-target-con-str.h | 1 -
9
tcg/sparc64/tcg-target.c.inc | 109 ++++++++++++-------------------
10
tcg/aarch64/tcg-target.c.inc | 45 ++++++++++++++------------------
10
3 files changed, 44 insertions(+), 84 deletions(-)
11
3 files changed, 19 insertions(+), 29 deletions(-)
11
12
12
diff --git a/tcg/sparc64/tcg-target-con-set.h b/tcg/sparc64/tcg-target-con-set.h
13
diff --git a/tcg/aarch64/tcg-target-con-set.h b/tcg/aarch64/tcg-target-con-set.h
13
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/sparc64/tcg-target-con-set.h
15
--- a/tcg/aarch64/tcg-target-con-set.h
15
+++ b/tcg/sparc64/tcg-target-con-set.h
16
+++ b/tcg/aarch64/tcg-target-con-set.h
16
@@ -XXX,XX +XXX,XX @@
17
@@ -XXX,XX +XXX,XX @@
18
* tcg-target-con-str.h; the constraint combination is inclusive or.
17
*/
19
*/
18
C_O0_I1(r)
20
C_O0_I1(r)
21
-C_O0_I2(lZ, l)
22
C_O0_I2(r, rA)
19
C_O0_I2(rZ, r)
23
C_O0_I2(rZ, r)
20
-C_O0_I2(RZ, r)
24
C_O0_I2(w, r)
21
C_O0_I2(rZ, rJ)
25
-C_O1_I1(r, l)
22
-C_O0_I2(RZ, RJ)
23
-C_O0_I2(sZ, A)
24
-C_O0_I2(SZ, A)
25
-C_O1_I1(r, A)
26
-C_O1_I1(R, A)
27
+C_O0_I2(sZ, s)
28
+C_O1_I1(r, s)
29
C_O1_I1(r, r)
26
C_O1_I1(r, r)
30
-C_O1_I1(r, R)
27
C_O1_I1(w, r)
31
-C_O1_I1(R, r)
28
C_O1_I1(w, w)
32
-C_O1_I1(R, R)
29
diff --git a/tcg/aarch64/tcg-target-con-str.h b/tcg/aarch64/tcg-target-con-str.h
33
-C_O1_I2(R, R, R)
34
+C_O1_I2(r, r, r)
35
C_O1_I2(r, rZ, rJ)
36
-C_O1_I2(R, RZ, RJ)
37
C_O1_I4(r, rZ, rJ, rI, 0)
38
-C_O1_I4(R, RZ, RJ, RI, 0)
39
C_O2_I2(r, r, rZ, rJ)
40
-C_O2_I4(R, R, RZ, RZ, RJ, RI)
41
C_O2_I4(r, r, rZ, rZ, rJ, rJ)
42
diff --git a/tcg/sparc64/tcg-target-con-str.h b/tcg/sparc64/tcg-target-con-str.h
43
index XXXXXXX..XXXXXXX 100644
30
index XXXXXXX..XXXXXXX 100644
44
--- a/tcg/sparc64/tcg-target-con-str.h
31
--- a/tcg/aarch64/tcg-target-con-str.h
45
+++ b/tcg/sparc64/tcg-target-con-str.h
32
+++ b/tcg/aarch64/tcg-target-con-str.h
46
@@ -XXX,XX +XXX,XX @@
33
@@ -XXX,XX +XXX,XX @@
47
* REGS(letter, register_mask)
34
* REGS(letter, register_mask)
48
*/
35
*/
49
REGS('r', ALL_GENERAL_REGS)
36
REGS('r', ALL_GENERAL_REGS)
50
-REGS('R', ALL_GENERAL_REGS64)
37
-REGS('l', ALL_QLDST_REGS)
51
REGS('s', ALL_QLDST_REGS)
38
REGS('w', ALL_VECTOR_REGS)
52
-REGS('S', ALL_QLDST_REGS64)
53
-REGS('A', TARGET_LONG_BITS == 64 ? ALL_QLDST_REGS64 : ALL_QLDST_REGS)
54
39
55
/*
40
/*
56
* Define constraint letters for constants:
41
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
57
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
58
index XXXXXXX..XXXXXXX 100644
42
index XXXXXXX..XXXXXXX 100644
59
--- a/tcg/sparc64/tcg-target.c.inc
43
--- a/tcg/aarch64/tcg-target.c.inc
60
+++ b/tcg/sparc64/tcg-target.c.inc
44
+++ b/tcg/aarch64/tcg-target.c.inc
61
@@ -XXX,XX +XXX,XX @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
45
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
46
#define ALL_GENERAL_REGS 0xffffffffu
47
#define ALL_VECTOR_REGS 0xffffffff00000000ull
48
49
-#ifdef CONFIG_SOFTMMU
50
-#define ALL_QLDST_REGS \
51
- (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \
52
- (1 << TCG_REG_X2) | (1 << TCG_REG_X3)))
53
-#else
54
-#define ALL_QLDST_REGS ALL_GENERAL_REGS
55
-#endif
56
-
57
/* Match a constant valid for addition (12-bit, optionally shifted). */
58
static inline bool is_aimm(uint64_t val)
59
{
60
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
61
unsigned s_bits = opc & MO_SIZE;
62
unsigned s_mask = (1u << s_bits) - 1;
63
unsigned mem_index = get_mmuidx(oi);
64
- TCGReg x3;
65
+ TCGReg addr_adj;
66
TCGType mask_type;
67
uint64_t compare_mask;
68
69
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
70
mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
71
? TCG_TYPE_I64 : TCG_TYPE_I32);
72
73
- /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */
74
+ /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {tmp0,tmp1}. */
75
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
76
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
77
QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
78
QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
79
- tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
80
+ tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0,
81
TLB_MASK_TABLE_OFS(mem_index), 1, 0);
82
83
/* Extract the TLB index from the address into X0. */
84
tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
85
- TCG_REG_X0, TCG_REG_X0, addr_reg,
86
+ TCG_REG_TMP0, TCG_REG_TMP0, addr_reg,
87
s->page_bits - CPU_TLB_ENTRY_BITS);
88
89
- /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */
90
- tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
91
+ /* Add the tlb_table pointer, forming the CPUTLBEntry address in TMP1. */
92
+ tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0);
93
94
- /* Load the tlb comparator into X0, and the fast path addend into X1. */
95
- tcg_out_ld(s, addr_type, TCG_REG_X0, TCG_REG_X1,
96
+ /* Load the tlb comparator into TMP0, and the fast path addend into TMP1. */
97
+ tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP1,
98
is_ld ? offsetof(CPUTLBEntry, addr_read)
99
: offsetof(CPUTLBEntry, addr_write));
100
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
101
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
102
offsetof(CPUTLBEntry, addend));
103
104
/*
105
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
106
* cross pages using the address of the last byte of the access.
107
*/
108
if (a_mask >= s_mask) {
109
- x3 = addr_reg;
110
+ addr_adj = addr_reg;
111
} else {
112
+ addr_adj = TCG_REG_TMP2;
113
tcg_out_insn(s, 3401, ADDI, addr_type,
114
- TCG_REG_X3, addr_reg, s_mask - a_mask);
115
- x3 = TCG_REG_X3;
116
+ addr_adj, addr_reg, s_mask - a_mask);
117
}
118
compare_mask = (uint64_t)s->page_mask | a_mask;
119
120
- /* Store the page mask part of the address into X3. */
121
- tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_X3, x3, compare_mask);
122
+ /* Store the page mask part of the address into TMP2. */
123
+ tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2,
124
+ addr_adj, compare_mask);
125
126
/* Perform the address comparison. */
127
- tcg_out_cmp(s, addr_type, TCG_REG_X0, TCG_REG_X3, 0);
128
+ tcg_out_cmp(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2, 0);
129
130
/* If not equal, we jump to the slow path. */
131
ldst->label_ptr[0] = s->code_ptr;
132
tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
133
134
- h->base = TCG_REG_X1,
135
+ h->base = TCG_REG_TMP1;
136
h->index = addr_reg;
137
h->index_ext = addr_type;
62
#else
138
#else
63
#define SOFTMMU_RESERVE_REGS 0
64
#endif
65
-
66
-/*
67
- * Note that sparcv8plus can only hold 64 bit quantities in %g and %o
68
- * registers. These are saved manually by the kernel in full 64-bit
69
- * slots. The %i and %l registers are saved by the register window
70
- * mechanism, which only allocates space for 32 bits. Given that this
71
- * window spill/fill can happen on any signal, we must consider the
72
- * high bits of the %i and %l registers garbage at all times.
73
- */
74
#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
75
-# define ALL_GENERAL_REGS64 ALL_GENERAL_REGS
76
#define ALL_QLDST_REGS (ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
77
-#define ALL_QLDST_REGS64 (ALL_GENERAL_REGS64 & ~SOFTMMU_RESERVE_REGS)
78
79
/* Define some temporary registers. T2 is used for constant generation. */
80
#define TCG_REG_T1 TCG_REG_G1
81
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
139
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
82
return C_O0_I1(r);
140
case INDEX_op_qemu_ld_a64_i32:
83
141
case INDEX_op_qemu_ld_a32_i64:
84
case INDEX_op_ld8u_i32:
142
case INDEX_op_qemu_ld_a64_i64:
85
+ case INDEX_op_ld8u_i64:
143
- return C_O1_I1(r, l);
86
case INDEX_op_ld8s_i32:
144
+ return C_O1_I1(r, r);
87
+ case INDEX_op_ld8s_i64:
145
case INDEX_op_qemu_st_a32_i32:
88
case INDEX_op_ld16u_i32:
146
case INDEX_op_qemu_st_a64_i32:
89
+ case INDEX_op_ld16u_i64:
147
case INDEX_op_qemu_st_a32_i64:
90
case INDEX_op_ld16s_i32:
148
case INDEX_op_qemu_st_a64_i64:
91
+ case INDEX_op_ld16s_i64:
149
- return C_O0_I2(lZ, l);
92
case INDEX_op_ld_i32:
150
+ return C_O0_I2(rZ, r);
93
+ case INDEX_op_ld32u_i64:
151
94
+ case INDEX_op_ld32s_i64:
152
case INDEX_op_deposit_i32:
95
+ case INDEX_op_ld_i64:
153
case INDEX_op_deposit_i64:
96
case INDEX_op_neg_i32:
97
+ case INDEX_op_neg_i64:
98
case INDEX_op_not_i32:
99
+ case INDEX_op_not_i64:
100
+ case INDEX_op_ext32s_i64:
101
+ case INDEX_op_ext32u_i64:
102
+ case INDEX_op_ext_i32_i64:
103
+ case INDEX_op_extu_i32_i64:
104
+ case INDEX_op_extrl_i64_i32:
105
+ case INDEX_op_extrh_i64_i32:
106
return C_O1_I1(r, r);
107
108
case INDEX_op_st8_i32:
109
+ case INDEX_op_st8_i64:
110
case INDEX_op_st16_i32:
111
+ case INDEX_op_st16_i64:
112
case INDEX_op_st_i32:
113
+ case INDEX_op_st32_i64:
114
+ case INDEX_op_st_i64:
115
return C_O0_I2(rZ, r);
116
117
case INDEX_op_add_i32:
118
+ case INDEX_op_add_i64:
119
case INDEX_op_mul_i32:
120
+ case INDEX_op_mul_i64:
121
case INDEX_op_div_i32:
122
+ case INDEX_op_div_i64:
123
case INDEX_op_divu_i32:
124
+ case INDEX_op_divu_i64:
125
case INDEX_op_sub_i32:
126
+ case INDEX_op_sub_i64:
127
case INDEX_op_and_i32:
128
+ case INDEX_op_and_i64:
129
case INDEX_op_andc_i32:
130
+ case INDEX_op_andc_i64:
131
case INDEX_op_or_i32:
132
+ case INDEX_op_or_i64:
133
case INDEX_op_orc_i32:
134
+ case INDEX_op_orc_i64:
135
case INDEX_op_xor_i32:
136
+ case INDEX_op_xor_i64:
137
case INDEX_op_shl_i32:
138
+ case INDEX_op_shl_i64:
139
case INDEX_op_shr_i32:
140
+ case INDEX_op_shr_i64:
141
case INDEX_op_sar_i32:
142
+ case INDEX_op_sar_i64:
143
case INDEX_op_setcond_i32:
144
+ case INDEX_op_setcond_i64:
145
return C_O1_I2(r, rZ, rJ);
146
147
case INDEX_op_brcond_i32:
148
+ case INDEX_op_brcond_i64:
149
return C_O0_I2(rZ, rJ);
150
case INDEX_op_movcond_i32:
151
+ case INDEX_op_movcond_i64:
152
return C_O1_I4(r, rZ, rJ, rI, 0);
153
case INDEX_op_add2_i32:
154
+ case INDEX_op_add2_i64:
155
case INDEX_op_sub2_i32:
156
+ case INDEX_op_sub2_i64:
157
return C_O2_I4(r, r, rZ, rZ, rJ, rJ);
158
case INDEX_op_mulu2_i32:
159
case INDEX_op_muls2_i32:
160
return C_O2_I2(r, r, rZ, rJ);
161
-
162
- case INDEX_op_ld8u_i64:
163
- case INDEX_op_ld8s_i64:
164
- case INDEX_op_ld16u_i64:
165
- case INDEX_op_ld16s_i64:
166
- case INDEX_op_ld32u_i64:
167
- case INDEX_op_ld32s_i64:
168
- case INDEX_op_ld_i64:
169
- case INDEX_op_ext_i32_i64:
170
- case INDEX_op_extu_i32_i64:
171
- return C_O1_I1(R, r);
172
-
173
- case INDEX_op_st8_i64:
174
- case INDEX_op_st16_i64:
175
- case INDEX_op_st32_i64:
176
- case INDEX_op_st_i64:
177
- return C_O0_I2(RZ, r);
178
-
179
- case INDEX_op_add_i64:
180
- case INDEX_op_mul_i64:
181
- case INDEX_op_div_i64:
182
- case INDEX_op_divu_i64:
183
- case INDEX_op_sub_i64:
184
- case INDEX_op_and_i64:
185
- case INDEX_op_andc_i64:
186
- case INDEX_op_or_i64:
187
- case INDEX_op_orc_i64:
188
- case INDEX_op_xor_i64:
189
- case INDEX_op_shl_i64:
190
- case INDEX_op_shr_i64:
191
- case INDEX_op_sar_i64:
192
- case INDEX_op_setcond_i64:
193
- return C_O1_I2(R, RZ, RJ);
194
-
195
- case INDEX_op_neg_i64:
196
- case INDEX_op_not_i64:
197
- case INDEX_op_ext32s_i64:
198
- case INDEX_op_ext32u_i64:
199
- return C_O1_I1(R, R);
200
-
201
- case INDEX_op_extrl_i64_i32:
202
- case INDEX_op_extrh_i64_i32:
203
- return C_O1_I1(r, R);
204
-
205
- case INDEX_op_brcond_i64:
206
- return C_O0_I2(RZ, RJ);
207
- case INDEX_op_movcond_i64:
208
- return C_O1_I4(R, RZ, RJ, RI, 0);
209
- case INDEX_op_add2_i64:
210
- case INDEX_op_sub2_i64:
211
- return C_O2_I4(R, R, RZ, RZ, RJ, RI);
212
case INDEX_op_muluh_i64:
213
- return C_O1_I2(R, R, R);
214
+ return C_O1_I2(r, r, r);
215
216
case INDEX_op_qemu_ld_i32:
217
- return C_O1_I1(r, A);
218
case INDEX_op_qemu_ld_i64:
219
- return C_O1_I1(R, A);
220
+ return C_O1_I1(r, s);
221
case INDEX_op_qemu_st_i32:
222
- return C_O0_I2(sZ, A);
223
case INDEX_op_qemu_st_i64:
224
- return C_O0_I2(SZ, A);
225
+ return C_O0_I2(sZ, s);
226
227
default:
228
g_assert_not_reached();
229
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
230
#endif
231
232
tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
233
- tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS64;
234
+ tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS;
235
236
tcg_target_call_clobber_regs = 0;
237
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G1);
238
--
154
--
239
2.34.1
155
2.34.1
diff view generated by jsdifflib
New patch
1
With FEAT_LSE2, LDP/STP suffices. Without FEAT_LSE2, use LDXP+STXP
2
16-byte atomicity is required and LDP/STP otherwise.
1
3
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/aarch64/tcg-target-con-set.h | 2 +
8
tcg/aarch64/tcg-target.h | 11 ++-
9
tcg/aarch64/tcg-target.c.inc | 141 ++++++++++++++++++++++++++++++-
10
3 files changed, 151 insertions(+), 3 deletions(-)
11
12
diff --git a/tcg/aarch64/tcg-target-con-set.h b/tcg/aarch64/tcg-target-con-set.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/aarch64/tcg-target-con-set.h
15
+++ b/tcg/aarch64/tcg-target-con-set.h
16
@@ -XXX,XX +XXX,XX @@ C_O0_I1(r)
17
C_O0_I2(r, rA)
18
C_O0_I2(rZ, r)
19
C_O0_I2(w, r)
20
+C_O0_I3(rZ, rZ, r)
21
C_O1_I1(r, r)
22
C_O1_I1(w, r)
23
C_O1_I1(w, w)
24
@@ -XXX,XX +XXX,XX @@ C_O1_I2(w, w, wO)
25
C_O1_I2(w, w, wZ)
26
C_O1_I3(w, w, w, w)
27
C_O1_I4(r, r, rA, rZ, rZ)
28
+C_O2_I1(r, r, r)
29
C_O2_I4(r, r, rZ, rZ, rA, rMZ)
30
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
31
index XXXXXXX..XXXXXXX 100644
32
--- a/tcg/aarch64/tcg-target.h
33
+++ b/tcg/aarch64/tcg-target.h
34
@@ -XXX,XX +XXX,XX @@ typedef enum {
35
#define TCG_TARGET_HAS_muluh_i64 1
36
#define TCG_TARGET_HAS_mulsh_i64 1
37
38
-#define TCG_TARGET_HAS_qemu_ldst_i128 0
39
+/*
40
+ * Without FEAT_LSE2, we must use LDXP+STXP to implement atomic 128-bit load,
41
+ * which requires writable pages. We must defer to the helper for user-only,
42
+ * but in system mode all ram is writable for the host.
43
+ */
44
+#ifdef CONFIG_USER_ONLY
45
+#define TCG_TARGET_HAS_qemu_ldst_i128 have_lse2
46
+#else
47
+#define TCG_TARGET_HAS_qemu_ldst_i128 1
48
+#endif
49
50
#define TCG_TARGET_HAS_v64 1
51
#define TCG_TARGET_HAS_v128 1
52
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
53
index XXXXXXX..XXXXXXX 100644
54
--- a/tcg/aarch64/tcg-target.c.inc
55
+++ b/tcg/aarch64/tcg-target.c.inc
56
@@ -XXX,XX +XXX,XX @@ typedef enum {
57
I3305_LDR_v64 = 0x5c000000,
58
I3305_LDR_v128 = 0x9c000000,
59
60
+ /* Load/store exclusive. */
61
+ I3306_LDXP = 0xc8600000,
62
+ I3306_STXP = 0xc8200000,
63
+
64
/* Load/store register. Described here as 3.3.12, but the helper
65
that emits them can transform to 3.3.10 or 3.3.13. */
66
I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
67
@@ -XXX,XX +XXX,XX @@ typedef enum {
68
I3406_ADR = 0x10000000,
69
I3406_ADRP = 0x90000000,
70
71
+ /* Add/subtract extended register instructions. */
72
+ I3501_ADD = 0x0b200000,
73
+
74
/* Add/subtract shifted register instructions (without a shift). */
75
I3502_ADD = 0x0b000000,
76
I3502_ADDS = 0x2b000000,
77
@@ -XXX,XX +XXX,XX @@ static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
78
tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
79
}
80
81
+static void tcg_out_insn_3306(TCGContext *s, AArch64Insn insn, TCGReg rs,
82
+ TCGReg rt, TCGReg rt2, TCGReg rn)
83
+{
84
+ tcg_out32(s, insn | rs << 16 | rt2 << 10 | rn << 5 | rt);
85
+}
86
+
87
static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
88
TCGReg rt, int imm19)
89
{
90
@@ -XXX,XX +XXX,XX @@ static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
91
tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
92
}
93
94
+static inline void tcg_out_insn_3501(TCGContext *s, AArch64Insn insn,
95
+ TCGType sf, TCGReg rd, TCGReg rn,
96
+ TCGReg rm, int opt, int imm3)
97
+{
98
+ tcg_out32(s, insn | sf << 31 | rm << 16 | opt << 13 |
99
+ imm3 << 10 | rn << 5 | rd);
100
+}
101
+
102
/* This function is for both 3.5.2 (Add/Subtract shifted register), for
103
the rare occasion when we actually want to supply a shift amount. */
104
static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
105
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
106
TCGType addr_type = s->addr_type;
107
TCGLabelQemuLdst *ldst = NULL;
108
MemOp opc = get_memop(oi);
109
+ MemOp s_bits = opc & MO_SIZE;
110
unsigned a_mask;
111
112
h->aa = atom_and_align_for_opc(s, opc,
113
have_lse2 ? MO_ATOM_WITHIN16
114
: MO_ATOM_IFALIGN,
115
- false);
116
+ s_bits == MO_128);
117
a_mask = (1 << h->aa.align) - 1;
118
119
#ifdef CONFIG_SOFTMMU
120
- unsigned s_bits = opc & MO_SIZE;
121
unsigned s_mask = (1u << s_bits) - 1;
122
unsigned mem_index = get_mmuidx(oi);
123
TCGReg addr_adj;
124
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
125
}
126
}
127
128
+static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
129
+ TCGReg addr_reg, MemOpIdx oi, bool is_ld)
130
+{
131
+ TCGLabelQemuLdst *ldst;
132
+ HostAddress h;
133
+ TCGReg base;
134
+ bool use_pair;
135
+
136
+ ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
137
+
138
+ /* Compose the final address, as LDP/STP have no indexing. */
139
+ if (h.index == TCG_REG_XZR) {
140
+ base = h.base;
141
+ } else {
142
+ base = TCG_REG_TMP2;
143
+ if (h.index_ext == TCG_TYPE_I32) {
144
+ /* add base, base, index, uxtw */
145
+ tcg_out_insn(s, 3501, ADD, TCG_TYPE_I64, base,
146
+ h.base, h.index, MO_32, 0);
147
+ } else {
148
+ /* add base, base, index */
149
+ tcg_out_insn(s, 3502, ADD, 1, base, h.base, h.index);
150
+ }
151
+ }
152
+
153
+ use_pair = h.aa.atom < MO_128 || have_lse2;
154
+
155
+ if (!use_pair) {
156
+ tcg_insn_unit *branch = NULL;
157
+ TCGReg ll, lh, sl, sh;
158
+
159
+ /*
160
+ * If we have already checked for 16-byte alignment, that's all
161
+ * we need. Otherwise we have determined that misaligned atomicity
162
+ * may be handled with two 8-byte loads.
163
+ */
164
+ if (h.aa.align < MO_128) {
165
+ /*
166
+ * TODO: align should be MO_64, so we only need test bit 3,
167
+ * which means we could use TBNZ instead of ANDS+B_C.
168
+ */
169
+ tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, 15);
170
+ branch = s->code_ptr;
171
+ tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
172
+ use_pair = true;
173
+ }
174
+
175
+ if (is_ld) {
176
+ /*
177
+ * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
178
+ * ldxp lo, hi, [base]
179
+ * stxp t0, lo, hi, [base]
180
+ * cbnz t0, .-8
181
+ * Require no overlap between data{lo,hi} and base.
182
+ */
183
+ if (datalo == base || datahi == base) {
184
+ tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_TMP2, base);
185
+ base = TCG_REG_TMP2;
186
+ }
187
+ ll = sl = datalo;
188
+ lh = sh = datahi;
189
+ } else {
190
+ /*
191
+ * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
192
+ * 1: ldxp t0, t1, [base]
193
+ * stxp t0, lo, hi, [base]
194
+ * cbnz t0, 1b
195
+ */
196
+ tcg_debug_assert(base != TCG_REG_TMP0 && base != TCG_REG_TMP1);
197
+ ll = TCG_REG_TMP0;
198
+ lh = TCG_REG_TMP1;
199
+ sl = datalo;
200
+ sh = datahi;
201
+ }
202
+
203
+ tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR, ll, lh, base);
204
+ tcg_out_insn(s, 3306, STXP, TCG_REG_TMP0, sl, sh, base);
205
+ tcg_out_insn(s, 3201, CBNZ, 0, TCG_REG_TMP0, -2);
206
+
207
+ if (use_pair) {
208
+ /* "b .+8", branching across the one insn of use_pair. */
209
+ tcg_out_insn(s, 3206, B, 2);
210
+ reloc_pc19(branch, tcg_splitwx_to_rx(s->code_ptr));
211
+ }
212
+ }
213
+
214
+ if (use_pair) {
215
+ if (is_ld) {
216
+ tcg_out_insn(s, 3314, LDP, datalo, datahi, base, 0, 1, 0);
217
+ } else {
218
+ tcg_out_insn(s, 3314, STP, datalo, datahi, base, 0, 1, 0);
219
+ }
220
+ }
221
+
222
+ if (ldst) {
223
+ ldst->type = TCG_TYPE_I128;
224
+ ldst->datalo_reg = datalo;
225
+ ldst->datahi_reg = datahi;
226
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
227
+ }
228
+}
229
+
230
static const tcg_insn_unit *tb_ret_addr;
231
232
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
233
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
234
case INDEX_op_qemu_st_a64_i64:
235
tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
236
break;
237
+ case INDEX_op_qemu_ld_a32_i128:
238
+ case INDEX_op_qemu_ld_a64_i128:
239
+ tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], true);
240
+ break;
241
+ case INDEX_op_qemu_st_a32_i128:
242
+ case INDEX_op_qemu_st_a64_i128:
243
+ tcg_out_qemu_ldst_i128(s, REG0(0), REG0(1), a2, args[3], false);
244
+ break;
245
246
case INDEX_op_bswap64_i64:
247
tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
248
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
249
case INDEX_op_qemu_ld_a32_i64:
250
case INDEX_op_qemu_ld_a64_i64:
251
return C_O1_I1(r, r);
252
+ case INDEX_op_qemu_ld_a32_i128:
253
+ case INDEX_op_qemu_ld_a64_i128:
254
+ return C_O2_I1(r, r, r);
255
case INDEX_op_qemu_st_a32_i32:
256
case INDEX_op_qemu_st_a64_i32:
257
case INDEX_op_qemu_st_a32_i64:
258
case INDEX_op_qemu_st_a64_i64:
259
return C_O0_I2(rZ, r);
260
+ case INDEX_op_qemu_st_a32_i128:
261
+ case INDEX_op_qemu_st_a64_i128:
262
+ return C_O0_I3(rZ, rZ, r);
263
264
case INDEX_op_deposit_i32:
265
case INDEX_op_deposit_i64:
266
--
267
2.34.1
diff view generated by jsdifflib
1
The helpers for reset_rf, cli, sti, clac, stac are
1
Use LQ/STQ with ISA v2.07, and 16-byte atomicity is required.
2
completely trivial; implement them inline.
2
Note that these instructions do not require 16-byte alignment.
3
3
4
Drop some nearby #if 0 code.
4
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
5
6
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
6
---
10
target/i386/helper.h | 5 -----
7
tcg/ppc/tcg-target-con-set.h | 2 +
11
target/i386/tcg/cc_helper.c | 41 -------------------------------------
8
tcg/ppc/tcg-target-con-str.h | 1 +
12
target/i386/tcg/translate.c | 30 ++++++++++++++++++++++-----
9
tcg/ppc/tcg-target.h | 3 +-
13
3 files changed, 25 insertions(+), 51 deletions(-)
10
tcg/ppc/tcg-target.c.inc | 108 +++++++++++++++++++++++++++++++----
11
4 files changed, 101 insertions(+), 13 deletions(-)
14
12
15
diff --git a/target/i386/helper.h b/target/i386/helper.h
13
diff --git a/tcg/ppc/tcg-target-con-set.h b/tcg/ppc/tcg-target-con-set.h
16
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
17
--- a/target/i386/helper.h
15
--- a/tcg/ppc/tcg-target-con-set.h
18
+++ b/target/i386/helper.h
16
+++ b/tcg/ppc/tcg-target-con-set.h
19
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_2(syscall, void, env, int)
17
@@ -XXX,XX +XXX,XX @@ C_O0_I2(r, r)
20
DEF_HELPER_2(sysret, void, env, int)
18
C_O0_I2(r, ri)
19
C_O0_I2(v, r)
20
C_O0_I3(r, r, r)
21
+C_O0_I3(o, m, r)
22
C_O0_I4(r, r, ri, ri)
23
C_O0_I4(r, r, r, r)
24
C_O1_I1(r, r)
25
@@ -XXX,XX +XXX,XX @@ C_O1_I3(v, v, v, v)
26
C_O1_I4(r, r, ri, rZ, rZ)
27
C_O1_I4(r, r, r, ri, ri)
28
C_O2_I1(r, r, r)
29
+C_O2_I1(o, m, r)
30
C_O2_I2(r, r, r, r)
31
C_O2_I4(r, r, rI, rZM, r, r)
32
C_O2_I4(r, r, r, r, rI, rZM)
33
diff --git a/tcg/ppc/tcg-target-con-str.h b/tcg/ppc/tcg-target-con-str.h
34
index XXXXXXX..XXXXXXX 100644
35
--- a/tcg/ppc/tcg-target-con-str.h
36
+++ b/tcg/ppc/tcg-target-con-str.h
37
@@ -XXX,XX +XXX,XX @@
38
* REGS(letter, register_mask)
39
*/
40
REGS('r', ALL_GENERAL_REGS)
41
+REGS('o', ALL_GENERAL_REGS & 0xAAAAAAAAu) /* odd registers */
42
REGS('v', ALL_VECTOR_REGS)
43
44
/*
45
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
46
index XXXXXXX..XXXXXXX 100644
47
--- a/tcg/ppc/tcg-target.h
48
+++ b/tcg/ppc/tcg-target.h
49
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
50
#define TCG_TARGET_HAS_mulsh_i64 1
21
#endif
51
#endif
22
DEF_HELPER_FLAGS_2(pause, TCG_CALL_NO_WG, noreturn, env, int)
52
23
-DEF_HELPER_1(reset_rf, void, env)
53
-#define TCG_TARGET_HAS_qemu_ldst_i128 0
24
DEF_HELPER_FLAGS_3(raise_interrupt, TCG_CALL_NO_WG, noreturn, env, int, int)
54
+#define TCG_TARGET_HAS_qemu_ldst_i128 \
25
DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_WG, noreturn, env, int)
55
+ (TCG_TARGET_REG_BITS == 64 && have_isa_2_07)
26
-DEF_HELPER_1(cli, void, env)
56
27
-DEF_HELPER_1(sti, void, env)
57
/*
28
-DEF_HELPER_1(clac, void, env)
58
* While technically Altivec could support V64, it has no 64-bit store
29
-DEF_HELPER_1(stac, void, env)
59
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
30
DEF_HELPER_3(boundw, void, env, tl, int)
60
index XXXXXXX..XXXXXXX 100644
31
DEF_HELPER_3(boundl, void, env, tl, int)
61
--- a/tcg/ppc/tcg-target.c.inc
32
62
+++ b/tcg/ppc/tcg-target.c.inc
33
diff --git a/target/i386/tcg/cc_helper.c b/target/i386/tcg/cc_helper.c
63
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
34
index XXXXXXX..XXXXXXX 100644
64
35
--- a/target/i386/tcg/cc_helper.c
65
#define B OPCD( 18)
36
+++ b/target/i386/tcg/cc_helper.c
66
#define BC OPCD( 16)
37
@@ -XXX,XX +XXX,XX @@ void helper_clts(CPUX86State *env)
67
+
38
env->cr[0] &= ~CR0_TS_MASK;
68
#define LBZ OPCD( 34)
39
env->hflags &= ~HF_TS_MASK;
69
#define LHZ OPCD( 40)
70
#define LHA OPCD( 42)
71
#define LWZ OPCD( 32)
72
#define LWZUX XO31( 55)
73
-#define STB OPCD( 38)
74
-#define STH OPCD( 44)
75
-#define STW OPCD( 36)
76
-
77
-#define STD XO62( 0)
78
-#define STDU XO62( 1)
79
-#define STDX XO31(149)
80
-
81
#define LD XO58( 0)
82
#define LDX XO31( 21)
83
#define LDU XO58( 1)
84
#define LDUX XO31( 53)
85
#define LWA XO58( 2)
86
#define LWAX XO31(341)
87
+#define LQ OPCD( 56)
88
+
89
+#define STB OPCD( 38)
90
+#define STH OPCD( 44)
91
+#define STW OPCD( 36)
92
+#define STD XO62( 0)
93
+#define STDU XO62( 1)
94
+#define STDX XO31(149)
95
+#define STQ XO62( 2)
96
97
#define ADDIC OPCD( 12)
98
#define ADDI OPCD( 14)
99
@@ -XXX,XX +XXX,XX @@ typedef struct {
100
101
bool tcg_target_has_memory_bswap(MemOp memop)
102
{
103
- return true;
104
+ TCGAtomAlign aa;
105
+
106
+ if ((memop & MO_SIZE) <= MO_64) {
107
+ return true;
108
+ }
109
+
110
+ /*
111
+ * Reject 16-byte memop with 16-byte atomicity,
112
+ * but do allow a pair of 64-bit operations.
113
+ */
114
+ aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
115
+ return aa.atom <= MO_64;
40
}
116
}
41
-
117
42
-void helper_reset_rf(CPUX86State *env)
118
/*
43
-{
119
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
44
- env->eflags &= ~RF_MASK;
120
{
45
-}
121
TCGLabelQemuLdst *ldst = NULL;
46
-
122
MemOp opc = get_memop(oi);
47
-void helper_cli(CPUX86State *env)
123
- MemOp a_bits;
48
-{
124
+ MemOp a_bits, s_bits;
49
- env->eflags &= ~IF_MASK;
125
50
-}
126
/*
51
-
127
* Book II, Section 1.4, Single-Copy Atomicity, specifies:
52
-void helper_sti(CPUX86State *env)
128
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
53
-{
129
* As of 3.0, "the non-atomic access is performed as described in
54
- env->eflags |= IF_MASK;
130
* the corresponding list", which matches MO_ATOM_SUBALIGN.
55
-}
131
*/
56
-
132
+ s_bits = opc & MO_SIZE;
57
-void helper_clac(CPUX86State *env)
133
h->aa = atom_and_align_for_opc(s, opc,
58
-{
134
have_isa_3_00 ? MO_ATOM_SUBALIGN
59
- env->eflags &= ~AC_MASK;
135
: MO_ATOM_IFALIGN,
60
-}
136
- false);
61
-
137
+ s_bits == MO_128);
62
-void helper_stac(CPUX86State *env)
138
a_bits = h->aa.align;
63
-{
139
64
- env->eflags |= AC_MASK;
140
#ifdef CONFIG_SOFTMMU
65
-}
141
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
66
-
142
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
67
-#if 0
143
int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
68
-/* vm86plus instructions */
144
int table_off = fast_off + offsetof(CPUTLBDescFast, table);
69
-void helper_cli_vm(CPUX86State *env)
145
- unsigned s_bits = opc & MO_SIZE;
70
-{
146
71
- env->eflags &= ~VIF_MASK;
147
ldst = new_ldst_label(s);
72
-}
148
ldst->is_ld = is_ld;
73
-
149
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
74
-void helper_sti_vm(CPUX86State *env)
75
-{
76
- env->eflags |= VIF_MASK;
77
- if (env->eflags & VIP_MASK) {
78
- raise_exception_ra(env, EXCP0D_GPF, GETPC());
79
- }
80
-}
81
-#endif
82
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
83
index XXXXXXX..XXXXXXX 100644
84
--- a/target/i386/tcg/translate.c
85
+++ b/target/i386/tcg/translate.c
86
@@ -XXX,XX +XXX,XX @@ static void gen_reset_hflag(DisasContext *s, uint32_t mask)
87
}
150
}
88
}
151
}
89
152
90
+static void gen_set_eflags(DisasContext *s, target_ulong mask)
153
+static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
154
+ TCGReg addr_reg, MemOpIdx oi, bool is_ld)
91
+{
155
+{
92
+ TCGv t = tcg_temp_new();
156
+ TCGLabelQemuLdst *ldst;
93
+
157
+ HostAddress h;
94
+ tcg_gen_ld_tl(t, cpu_env, offsetof(CPUX86State, eflags));
158
+ bool need_bswap;
95
+ tcg_gen_ori_tl(t, t, mask);
159
+ uint32_t insn;
96
+ tcg_gen_st_tl(t, cpu_env, offsetof(CPUX86State, eflags));
160
+ TCGReg index;
97
+ tcg_temp_free(t);
161
+
162
+ ldst = prepare_host_addr(s, &h, addr_reg, -1, oi, is_ld);
163
+
164
+ /* Compose the final address, as LQ/STQ have no indexing. */
165
+ index = h.index;
166
+ if (h.base != 0) {
167
+ index = TCG_REG_TMP1;
168
+ tcg_out32(s, ADD | TAB(index, h.base, h.index));
169
+ }
170
+ need_bswap = get_memop(oi) & MO_BSWAP;
171
+
172
+ if (h.aa.atom == MO_128) {
173
+ tcg_debug_assert(!need_bswap);
174
+ tcg_debug_assert(datalo & 1);
175
+ tcg_debug_assert(datahi == datalo - 1);
176
+ insn = is_ld ? LQ : STQ;
177
+ tcg_out32(s, insn | TAI(datahi, index, 0));
178
+ } else {
179
+ TCGReg d1, d2;
180
+
181
+ if (HOST_BIG_ENDIAN ^ need_bswap) {
182
+ d1 = datahi, d2 = datalo;
183
+ } else {
184
+ d1 = datalo, d2 = datahi;
185
+ }
186
+
187
+ if (need_bswap) {
188
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 8);
189
+ insn = is_ld ? LDBRX : STDBRX;
190
+ tcg_out32(s, insn | TAB(d1, 0, index));
191
+ tcg_out32(s, insn | TAB(d2, index, TCG_REG_R0));
192
+ } else {
193
+ insn = is_ld ? LD : STD;
194
+ tcg_out32(s, insn | TAI(d1, index, 0));
195
+ tcg_out32(s, insn | TAI(d2, index, 8));
196
+ }
197
+ }
198
+
199
+ if (ldst) {
200
+ ldst->type = TCG_TYPE_I128;
201
+ ldst->datalo_reg = datalo;
202
+ ldst->datahi_reg = datahi;
203
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
204
+ }
98
+}
205
+}
99
+
206
+
100
+static void gen_reset_eflags(DisasContext *s, target_ulong mask)
207
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
101
+{
102
+ TCGv t = tcg_temp_new();
103
+
104
+ tcg_gen_ld_tl(t, cpu_env, offsetof(CPUX86State, eflags));
105
+ tcg_gen_andi_tl(t, t, ~mask);
106
+ tcg_gen_st_tl(t, cpu_env, offsetof(CPUX86State, eflags));
107
+ tcg_temp_free(t);
108
+}
109
+
110
/* Clear BND registers during legacy branches. */
111
static void gen_bnd_jmp(DisasContext *s)
112
{
208
{
113
@@ -XXX,XX +XXX,XX @@ do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr)
209
int i;
114
}
210
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
115
211
args[4], TCG_TYPE_I64);
116
if (s->base.tb->flags & HF_RF_MASK) {
117
- gen_helper_reset_rf(cpu_env);
118
+ gen_reset_eflags(s, RF_MASK);
119
}
120
if (recheck_tf) {
121
gen_helper_rechecking_single_step(cpu_env);
122
@@ -XXX,XX +XXX,XX @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
123
#endif
124
case 0xfa: /* cli */
125
if (check_iopl(s)) {
126
- gen_helper_cli(cpu_env);
127
+ gen_reset_eflags(s, IF_MASK);
128
}
212
}
129
break;
213
break;
130
case 0xfb: /* sti */
214
+ case INDEX_op_qemu_ld_a32_i128:
131
if (check_iopl(s)) {
215
+ case INDEX_op_qemu_ld_a64_i128:
132
- gen_helper_sti(cpu_env);
216
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
133
+ gen_set_eflags(s, IF_MASK);
217
+ tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
134
/* interruptions are enabled only the first insn after sti */
218
+ break;
135
gen_update_eip_next(s);
219
136
gen_eob_inhibit_irq(s, true);
220
case INDEX_op_qemu_st_a64_i32:
137
@@ -XXX,XX +XXX,XX @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
221
if (TCG_TARGET_REG_BITS == 32) {
138
|| CPL(s) != 0) {
222
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
139
goto illegal_op;
223
args[4], TCG_TYPE_I64);
140
}
224
}
141
- gen_helper_clac(cpu_env);
225
break;
142
+ gen_reset_eflags(s, AC_MASK);
226
+ case INDEX_op_qemu_st_a32_i128:
143
s->base.is_jmp = DISAS_EOB_NEXT;
227
+ case INDEX_op_qemu_st_a64_i128:
144
break;
228
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
145
229
+ tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
146
@@ -XXX,XX +XXX,XX @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
230
+ break;
147
|| CPL(s) != 0) {
231
148
goto illegal_op;
232
case INDEX_op_setcond_i32:
149
}
233
tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
150
- gen_helper_stac(cpu_env);
234
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
151
+ gen_set_eflags(s, AC_MASK);
235
case INDEX_op_qemu_st_a64_i64:
152
s->base.is_jmp = DISAS_EOB_NEXT;
236
return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I4(r, r, r, r);
153
break;
237
154
238
+ case INDEX_op_qemu_ld_a32_i128:
239
+ case INDEX_op_qemu_ld_a64_i128:
240
+ return C_O2_I1(o, m, r);
241
+ case INDEX_op_qemu_st_a32_i128:
242
+ case INDEX_op_qemu_st_a64_i128:
243
+ return C_O0_I3(o, m, r);
244
+
245
case INDEX_op_add_vec:
246
case INDEX_op_sub_vec:
247
case INDEX_op_mul_vec:
155
--
248
--
156
2.34.1
249
2.34.1
157
158
diff view generated by jsdifflib
1
Avoid cpu_restore_state, and modifying env->eip out from
1
Use LPQ/STPQ when 16-byte atomicity is required.
2
underneath the translator with TARGET_TB_PCREL. There is
2
Note that these instructions require 16-byte alignment.
3
some slight duplication from x86_restore_state_to_opc,
4
but it's just a few lines.
5
3
6
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1269
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Claudio Fontana <cfontana@suse.de>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
6
---
10
target/i386/helper.c | 21 +++++++++++++++++++--
7
tcg/s390x/tcg-target-con-set.h | 2 +
11
1 file changed, 19 insertions(+), 2 deletions(-)
8
tcg/s390x/tcg-target.h | 2 +-
9
tcg/s390x/tcg-target.c.inc | 107 ++++++++++++++++++++++++++++++++-
10
3 files changed, 107 insertions(+), 4 deletions(-)
12
11
13
diff --git a/target/i386/helper.c b/target/i386/helper.c
12
diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h
14
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
15
--- a/target/i386/helper.c
14
--- a/tcg/s390x/tcg-target-con-set.h
16
+++ b/target/i386/helper.c
15
+++ b/tcg/s390x/tcg-target-con-set.h
17
@@ -XXX,XX +XXX,XX @@ void cpu_x86_inject_mce(Monitor *mon, X86CPU *cpu, int bank,
16
@@ -XXX,XX +XXX,XX @@ C_O0_I2(r, r)
17
C_O0_I2(r, ri)
18
C_O0_I2(r, rA)
19
C_O0_I2(v, r)
20
+C_O0_I3(o, m, r)
21
C_O1_I1(r, r)
22
C_O1_I1(v, r)
23
C_O1_I1(v, v)
24
@@ -XXX,XX +XXX,XX @@ C_O1_I2(v, v, v)
25
C_O1_I3(v, v, v, v)
26
C_O1_I4(r, r, ri, rI, r)
27
C_O1_I4(r, r, rA, rI, r)
28
+C_O2_I1(o, m, r)
29
C_O2_I2(o, m, 0, r)
30
C_O2_I2(o, m, r, r)
31
C_O2_I3(o, m, 0, 1, r)
32
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
33
index XXXXXXX..XXXXXXX 100644
34
--- a/tcg/s390x/tcg-target.h
35
+++ b/tcg/s390x/tcg-target.h
36
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
37
#define TCG_TARGET_HAS_muluh_i64 0
38
#define TCG_TARGET_HAS_mulsh_i64 0
39
40
-#define TCG_TARGET_HAS_qemu_ldst_i128 0
41
+#define TCG_TARGET_HAS_qemu_ldst_i128 1
42
43
#define TCG_TARGET_HAS_v64 HAVE_FACILITY(VECTOR)
44
#define TCG_TARGET_HAS_v128 HAVE_FACILITY(VECTOR)
45
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
46
index XXXXXXX..XXXXXXX 100644
47
--- a/tcg/s390x/tcg-target.c.inc
48
+++ b/tcg/s390x/tcg-target.c.inc
49
@@ -XXX,XX +XXX,XX @@ typedef enum S390Opcode {
50
RXY_LLGF = 0xe316,
51
RXY_LLGH = 0xe391,
52
RXY_LMG = 0xeb04,
53
+ RXY_LPQ = 0xe38f,
54
RXY_LRV = 0xe31e,
55
RXY_LRVG = 0xe30f,
56
RXY_LRVH = 0xe31f,
57
@@ -XXX,XX +XXX,XX @@ typedef enum S390Opcode {
58
RXY_STG = 0xe324,
59
RXY_STHY = 0xe370,
60
RXY_STMG = 0xeb24,
61
+ RXY_STPQ = 0xe38e,
62
RXY_STRV = 0xe33e,
63
RXY_STRVG = 0xe32f,
64
RXY_STRVH = 0xe33f,
65
@@ -XXX,XX +XXX,XX @@ typedef struct {
66
67
bool tcg_target_has_memory_bswap(MemOp memop)
68
{
69
- return true;
70
+ TCGAtomAlign aa;
71
+
72
+ if ((memop & MO_SIZE) <= MO_64) {
73
+ return true;
74
+ }
75
+
76
+ /*
77
+ * Reject 16-byte memop with 16-byte atomicity,
78
+ * but do allow a pair of 64-bit operations.
79
+ */
80
+ aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
81
+ return aa.atom <= MO_64;
82
}
83
84
static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
85
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
86
{
87
TCGLabelQemuLdst *ldst = NULL;
88
MemOp opc = get_memop(oi);
89
+ MemOp s_bits = opc & MO_SIZE;
90
unsigned a_mask;
91
92
- h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
93
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128);
94
a_mask = (1 << h->aa.align) - 1;
95
96
#ifdef CONFIG_SOFTMMU
97
- unsigned s_bits = opc & MO_SIZE;
98
unsigned s_mask = (1 << s_bits) - 1;
99
int mem_index = get_mmuidx(oi);
100
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
101
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
18
}
102
}
19
}
103
}
20
104
21
+static target_ulong get_memio_eip(CPUX86State *env)
105
+static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
106
+ TCGReg addr_reg, MemOpIdx oi, bool is_ld)
22
+{
107
+{
23
+ uint64_t data[TARGET_INSN_START_WORDS];
108
+ TCGLabel *l1 = NULL, *l2 = NULL;
24
+ CPUState *cs = env_cpu(env);
109
+ TCGLabelQemuLdst *ldst;
25
+
110
+ HostAddress h;
26
+ if (!cpu_unwind_state_data(cs, cs->mem_io_pc, data)) {
111
+ bool need_bswap;
27
+ return env->eip;
112
+ bool use_pair;
28
+ }
113
+ S390Opcode insn;
29
+
114
+
30
+ /* Per x86_restore_state_to_opc. */
115
+ ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
31
+ if (TARGET_TB_PCREL) {
116
+
32
+ return (env->eip & TARGET_PAGE_MASK) | data[0];
117
+ use_pair = h.aa.atom < MO_128;
33
+ } else {
118
+ need_bswap = get_memop(oi) & MO_BSWAP;
34
+ return data[0] - env->segs[R_CS].base;
119
+
120
+ if (!use_pair) {
121
+ /*
122
+ * Atomicity requires we use LPQ. If we've already checked for
123
+ * 16-byte alignment, that's all we need. If we arrive with
124
+ * lesser alignment, we have determined that less than 16-byte
125
+ * alignment can be satisfied with two 8-byte loads.
126
+ */
127
+ if (h.aa.align < MO_128) {
128
+ use_pair = true;
129
+ l1 = gen_new_label();
130
+ l2 = gen_new_label();
131
+
132
+ tcg_out_insn(s, RI, TMLL, addr_reg, 15);
133
+ tgen_branch(s, 7, l1); /* CC in {1,2,3} */
134
+ }
135
+
136
+ tcg_debug_assert(!need_bswap);
137
+ tcg_debug_assert(datalo & 1);
138
+ tcg_debug_assert(datahi == datalo - 1);
139
+ insn = is_ld ? RXY_LPQ : RXY_STPQ;
140
+ tcg_out_insn_RXY(s, insn, datahi, h.base, h.index, h.disp);
141
+
142
+ if (use_pair) {
143
+ tgen_branch(s, S390_CC_ALWAYS, l2);
144
+ tcg_out_label(s, l1);
145
+ }
146
+ }
147
+ if (use_pair) {
148
+ TCGReg d1, d2;
149
+
150
+ if (need_bswap) {
151
+ d1 = datalo, d2 = datahi;
152
+ insn = is_ld ? RXY_LRVG : RXY_STRVG;
153
+ } else {
154
+ d1 = datahi, d2 = datalo;
155
+ insn = is_ld ? RXY_LG : RXY_STG;
156
+ }
157
+
158
+ if (h.base == d1 || h.index == d1) {
159
+ tcg_out_insn(s, RXY, LAY, TCG_TMP0, h.base, h.index, h.disp);
160
+ h.base = TCG_TMP0;
161
+ h.index = TCG_REG_NONE;
162
+ h.disp = 0;
163
+ }
164
+ tcg_out_insn_RXY(s, insn, d1, h.base, h.index, h.disp);
165
+ tcg_out_insn_RXY(s, insn, d2, h.base, h.index, h.disp + 8);
166
+ }
167
+ if (l2) {
168
+ tcg_out_label(s, l2);
169
+ }
170
+
171
+ if (ldst) {
172
+ ldst->type = TCG_TYPE_I128;
173
+ ldst->datalo_reg = datalo;
174
+ ldst->datahi_reg = datahi;
175
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
35
+ }
176
+ }
36
+}
177
+}
37
+
178
+
38
void cpu_report_tpr_access(CPUX86State *env, TPRAccess access)
179
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
39
{
180
{
40
X86CPU *cpu = env_archcpu(env);
181
/* Reuse the zeroing that exists for goto_ptr. */
41
@@ -XXX,XX +XXX,XX @@ void cpu_report_tpr_access(CPUX86State *env, TPRAccess access)
182
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
42
183
case INDEX_op_qemu_st_a64_i64:
43
cpu_interrupt(cs, CPU_INTERRUPT_TPR);
184
tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I64);
44
} else if (tcg_enabled()) {
185
break;
45
- cpu_restore_state(cs, cs->mem_io_pc, false);
186
+ case INDEX_op_qemu_ld_a32_i128:
46
+ target_ulong eip = get_memio_eip(env);
187
+ case INDEX_op_qemu_ld_a64_i128:
47
188
+ tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
48
- apic_handle_tpr_access_report(cpu->apic_state, env->eip, access);
189
+ break;
49
+ apic_handle_tpr_access_report(cpu->apic_state, eip, access);
190
+ case INDEX_op_qemu_st_a32_i128:
50
}
191
+ case INDEX_op_qemu_st_a64_i128:
51
}
192
+ tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
52
#endif /* !CONFIG_USER_ONLY */
193
+ break;
194
195
case INDEX_op_ld16s_i64:
196
tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]);
197
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
198
case INDEX_op_qemu_st_a32_i32:
199
case INDEX_op_qemu_st_a64_i32:
200
return C_O0_I2(r, r);
201
+ case INDEX_op_qemu_ld_a32_i128:
202
+ case INDEX_op_qemu_ld_a64_i128:
203
+ return C_O2_I1(o, m, r);
204
+ case INDEX_op_qemu_st_a32_i128:
205
+ case INDEX_op_qemu_st_a64_i128:
206
+ return C_O0_I3(o, m, r);
207
208
case INDEX_op_deposit_i32:
209
case INDEX_op_deposit_i64:
53
--
210
--
54
2.34.1
211
2.34.1
diff view generated by jsdifflib
1
The value passed is always true.
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
3
Reviewed-by: Claudio Fontana <cfontana@suse.de>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
3
---
6
accel/tcg/internal.h | 2 +-
4
.../generic/host/load-extract-al16-al8.h | 45 +++++++++++++++++++
7
accel/tcg/tb-maint.c | 4 ++--
5
accel/tcg/ldst_atomicity.c.inc | 36 +--------------
8
accel/tcg/translate-all.c | 15 +++++++--------
6
2 files changed, 47 insertions(+), 34 deletions(-)
9
3 files changed, 10 insertions(+), 11 deletions(-)
7
create mode 100644 host/include/generic/host/load-extract-al16-al8.h
10
8
11
diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h
9
diff --git a/host/include/generic/host/load-extract-al16-al8.h b/host/include/generic/host/load-extract-al16-al8.h
10
new file mode 100644
11
index XXXXXXX..XXXXXXX
12
--- /dev/null
13
+++ b/host/include/generic/host/load-extract-al16-al8.h
14
@@ -XXX,XX +XXX,XX @@
15
+/*
16
+ * SPDX-License-Identifier: GPL-2.0-or-later
17
+ * Atomic extract 64 from 128-bit, generic version.
18
+ *
19
+ * Copyright (C) 2023 Linaro, Ltd.
20
+ */
21
+
22
+#ifndef HOST_LOAD_EXTRACT_AL16_AL8_H
23
+#define HOST_LOAD_EXTRACT_AL16_AL8_H
24
+
25
+/**
26
+ * load_atom_extract_al16_or_al8:
27
+ * @pv: host address
28
+ * @s: object size in bytes, @s <= 8.
29
+ *
30
+ * Load @s bytes from @pv, when pv % s != 0. If [p, p+s-1] does not
31
+ * cross an 16-byte boundary then the access must be 16-byte atomic,
32
+ * otherwise the access must be 8-byte atomic.
33
+ */
34
+static inline uint64_t ATTRIBUTE_ATOMIC128_OPT
35
+load_atom_extract_al16_or_al8(void *pv, int s)
36
+{
37
+ uintptr_t pi = (uintptr_t)pv;
38
+ int o = pi & 7;
39
+ int shr = (HOST_BIG_ENDIAN ? 16 - s - o : o) * 8;
40
+ Int128 r;
41
+
42
+ pv = (void *)(pi & ~7);
43
+ if (pi & 8) {
44
+ uint64_t *p8 = __builtin_assume_aligned(pv, 16, 8);
45
+ uint64_t a = qatomic_read__nocheck(p8);
46
+ uint64_t b = qatomic_read__nocheck(p8 + 1);
47
+
48
+ if (HOST_BIG_ENDIAN) {
49
+ r = int128_make128(b, a);
50
+ } else {
51
+ r = int128_make128(a, b);
52
+ }
53
+ } else {
54
+ r = atomic16_read_ro(pv);
55
+ }
56
+ return int128_getlo(int128_urshift(r, shr));
57
+}
58
+
59
+#endif /* HOST_LOAD_EXTRACT_AL16_AL8_H */
60
diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc
12
index XXXXXXX..XXXXXXX 100644
61
index XXXXXXX..XXXXXXX 100644
13
--- a/accel/tcg/internal.h
62
--- a/accel/tcg/ldst_atomicity.c.inc
14
+++ b/accel/tcg/internal.h
63
+++ b/accel/tcg/ldst_atomicity.c.inc
15
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
64
@@ -XXX,XX +XXX,XX @@
16
tb_page_addr_t phys_page2);
65
* See the COPYING file in the top-level directory.
17
bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc);
66
*/
18
void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
67
19
- uintptr_t host_pc, bool reset_icount);
68
+#include "host/load-extract-al16-al8.h"
20
+ uintptr_t host_pc);
69
+
21
70
#ifdef CONFIG_ATOMIC64
22
/* Return the current PC from CPU, which may be cached in TB. */
71
# define HAVE_al8 true
23
static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb)
72
#else
24
diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
73
@@ -XXX,XX +XXX,XX @@ static uint64_t load_atom_extract_al16_or_exit(CPUArchState *env, uintptr_t ra,
25
index XXXXXXX..XXXXXXX 100644
74
return int128_getlo(r);
26
--- a/accel/tcg/tb-maint.c
27
+++ b/accel/tcg/tb-maint.c
28
@@ -XXX,XX +XXX,XX @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages,
29
* restore the CPU state.
30
*/
31
current_tb_modified = true;
32
- cpu_restore_state_from_tb(cpu, current_tb, retaddr, true);
33
+ cpu_restore_state_from_tb(cpu, current_tb, retaddr);
34
}
35
#endif /* TARGET_HAS_PRECISE_SMC */
36
tb_phys_invalidate__locked(tb);
37
@@ -XXX,XX +XXX,XX @@ bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc)
38
* function to partially restore the CPU state.
39
*/
40
current_tb_modified = true;
41
- cpu_restore_state_from_tb(cpu, current_tb, pc, true);
42
+ cpu_restore_state_from_tb(cpu, current_tb, pc);
43
}
44
#endif /* TARGET_HAS_PRECISE_SMC */
45
tb_phys_invalidate(tb, addr);
46
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/accel/tcg/translate-all.c
49
+++ b/accel/tcg/translate-all.c
50
@@ -XXX,XX +XXX,XX @@ static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc,
51
}
75
}
52
76
53
/*
77
-/**
54
- * The cpu state corresponding to 'host_pc' is restored.
78
- * load_atom_extract_al16_or_al8:
55
- * When reset_icount is true, current TB will be interrupted and
79
- * @p: host address
56
- * icount should be recalculated.
80
- * @s: object size in bytes, @s <= 8.
57
+ * The cpu state corresponding to 'host_pc' is restored in
81
- *
58
+ * preparation for exiting the TB.
82
- * Load @s bytes from @p, when p % s != 0. If [p, p+s-1] does not
59
*/
83
- * cross an 16-byte boundary then the access must be 16-byte atomic,
60
void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
84
- * otherwise the access must be 8-byte atomic.
61
- uintptr_t host_pc, bool reset_icount)
85
- */
62
+ uintptr_t host_pc)
86
-static inline uint64_t ATTRIBUTE_ATOMIC128_OPT
63
{
87
-load_atom_extract_al16_or_al8(void *pv, int s)
64
uint64_t data[TARGET_INSN_START_WORDS];
88
-{
65
#ifdef CONFIG_PROFILER
89
- uintptr_t pi = (uintptr_t)pv;
66
@@ -XXX,XX +XXX,XX @@ void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
90
- int o = pi & 7;
67
return;
91
- int shr = (HOST_BIG_ENDIAN ? 16 - s - o : o) * 8;
68
}
92
- Int128 r;
69
93
-
70
- if (reset_icount && (tb_cflags(tb) & CF_USE_ICOUNT)) {
94
- pv = (void *)(pi & ~7);
71
+ if (tb_cflags(tb) & CF_USE_ICOUNT) {
95
- if (pi & 8) {
72
assert(icount_enabled());
96
- uint64_t *p8 = __builtin_assume_aligned(pv, 16, 8);
73
/*
97
- uint64_t a = qatomic_read__nocheck(p8);
74
* Reset the cycle counter to the start of the block and
98
- uint64_t b = qatomic_read__nocheck(p8 + 1);
75
@@ -XXX,XX +XXX,XX @@ bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc)
99
-
76
if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
100
- if (HOST_BIG_ENDIAN) {
77
TranslationBlock *tb = tcg_tb_lookup(host_pc);
101
- r = int128_make128(b, a);
78
if (tb) {
102
- } else {
79
- cpu_restore_state_from_tb(cpu, tb, host_pc, true);
103
- r = int128_make128(a, b);
80
+ cpu_restore_state_from_tb(cpu, tb, host_pc);
104
- }
81
return true;
105
- } else {
82
}
106
- r = atomic16_read_ro(pv);
83
}
107
- }
84
@@ -XXX,XX +XXX,XX @@ void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
108
- return int128_getlo(int128_urshift(r, shr));
85
tb = tcg_tb_lookup(retaddr);
109
-}
86
if (tb) {
110
-
87
/* We can use retranslation to find the PC. */
111
/**
88
- cpu_restore_state_from_tb(cpu, tb, retaddr, true);
112
* load_atom_4_by_2:
89
+ cpu_restore_state_from_tb(cpu, tb, retaddr);
113
* @pv: host address
90
tb_phys_invalidate(tb, -1);
91
} else {
92
/* The exception probably happened in a helper. The CPU state should
93
@@ -XXX,XX +XXX,XX @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
94
cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
95
(void *)retaddr);
96
}
97
- cpu_restore_state_from_tb(cpu, tb, retaddr, true);
98
+ cpu_restore_state_from_tb(cpu, tb, retaddr);
99
100
/*
101
* Some guests must re-execute the branch when re-executing a delay
102
--
114
--
103
2.34.1
115
2.34.1
diff view generated by jsdifflib
1
Add a way to examine the unwind data without actually
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
restoring the data back into env.
3
4
Reviewed-by: Claudio Fontana <cfontana@suse.de>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
3
---
7
accel/tcg/internal.h | 4 +--
4
host/include/generic/host/store-insert-al16.h | 50 +++++++++++++++++++
8
include/exec/exec-all.h | 21 ++++++++---
5
accel/tcg/ldst_atomicity.c.inc | 40 +--------------
9
accel/tcg/translate-all.c | 74 ++++++++++++++++++++++++++-------------
6
2 files changed, 51 insertions(+), 39 deletions(-)
10
3 files changed, 68 insertions(+), 31 deletions(-)
7
create mode 100644 host/include/generic/host/store-insert-al16.h
11
8
12
diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h
9
diff --git a/host/include/generic/host/store-insert-al16.h b/host/include/generic/host/store-insert-al16.h
13
index XXXXXXX..XXXXXXX 100644
10
new file mode 100644
14
--- a/accel/tcg/internal.h
11
index XXXXXXX..XXXXXXX
15
+++ b/accel/tcg/internal.h
12
--- /dev/null
16
@@ -XXX,XX +XXX,XX @@ void tb_reset_jump(TranslationBlock *tb, int n);
13
+++ b/host/include/generic/host/store-insert-al16.h
17
TranslationBlock *tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
14
@@ -XXX,XX +XXX,XX @@
18
tb_page_addr_t phys_page2);
15
+/*
19
bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc);
16
+ * SPDX-License-Identifier: GPL-2.0-or-later
20
-int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
17
+ * Atomic store insert into 128-bit, generic version.
21
- uintptr_t searched_pc, bool reset_icount);
18
+ *
22
+void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
19
+ * Copyright (C) 2023 Linaro, Ltd.
23
+ uintptr_t host_pc, bool reset_icount);
20
+ */
24
21
+
25
/* Return the current PC from CPU, which may be cached in TB. */
22
+#ifndef HOST_STORE_INSERT_AL16_H
26
static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb)
23
+#define HOST_STORE_INSERT_AL16_H
27
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
24
+
28
index XXXXXXX..XXXXXXX 100644
29
--- a/include/exec/exec-all.h
30
+++ b/include/exec/exec-all.h
31
@@ -XXX,XX +XXX,XX @@ typedef ram_addr_t tb_page_addr_t;
32
#define TB_PAGE_ADDR_FMT RAM_ADDR_FMT
33
#endif
34
35
+/**
25
+/**
36
+ * cpu_unwind_state_data:
26
+ * store_atom_insert_al16:
37
+ * @cpu: the cpu context
27
+ * @p: host address
38
+ * @host_pc: the host pc within the translation
28
+ * @val: shifted value to store
39
+ * @data: output data
29
+ * @msk: mask for value to store
40
+ *
30
+ *
41
+ * Attempt to load the the unwind state for a host pc occurring in
31
+ * Atomically store @val to @p masked by @msk.
42
+ * translated code. If @host_pc is not in translated code, the
43
+ * function returns false; otherwise @data is loaded.
44
+ * This is the same unwind info as given to restore_state_to_opc.
45
+ */
32
+ */
46
+bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data);
33
+static inline void ATTRIBUTE_ATOMIC128_OPT
34
+store_atom_insert_al16(Int128 *ps, Int128 val, Int128 msk)
35
+{
36
+#if defined(CONFIG_ATOMIC128)
37
+ __uint128_t *pu;
38
+ Int128Alias old, new;
47
+
39
+
48
/**
40
+ /* With CONFIG_ATOMIC128, we can avoid the memory barriers. */
49
* cpu_restore_state:
41
+ pu = __builtin_assume_aligned(ps, 16);
50
- * @cpu: the vCPU state is to be restore to
42
+ old.u = *pu;
51
- * @searched_pc: the host PC the fault occurred at
43
+ msk = int128_not(msk);
52
+ * @cpu: the cpu context
44
+ do {
53
+ * @host_pc: the host pc within the translation
45
+ new.s = int128_and(old.s, msk);
54
* @will_exit: true if the TB executed will be interrupted after some
46
+ new.s = int128_or(new.s, val);
55
cpu adjustments. Required for maintaining the correct
47
+ } while (!__atomic_compare_exchange_n(pu, &old.u, new.u, true,
56
icount valus
48
+ __ATOMIC_RELAXED, __ATOMIC_RELAXED));
57
* @return: true if state was restored, false otherwise
49
+#else
58
*
50
+ Int128 old, new, cmp;
59
* Attempt to restore the state for a fault occurring in translated
51
+
60
- * code. If the searched_pc is not in translated code no state is
52
+ ps = __builtin_assume_aligned(ps, 16);
61
+ * code. If @host_pc is not in translated code no state is
53
+ old = *ps;
62
* restored and the function returns false.
54
+ msk = int128_not(msk);
63
*/
55
+ do {
64
-bool cpu_restore_state(CPUState *cpu, uintptr_t searched_pc, bool will_exit);
56
+ cmp = old;
65
+bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit);
57
+ new = int128_and(old, msk);
66
58
+ new = int128_or(new, val);
67
G_NORETURN void cpu_loop_exit_noexc(CPUState *cpu);
59
+ old = atomic16_cmpxchg(ps, cmp, new);
68
G_NORETURN void cpu_loop_exit(CPUState *cpu);
60
+ } while (int128_ne(cmp, old));
69
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
61
+#endif
70
index XXXXXXX..XXXXXXX 100644
71
--- a/accel/tcg/translate-all.c
72
+++ b/accel/tcg/translate-all.c
73
@@ -XXX,XX +XXX,XX @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
74
return p - block;
75
}
76
77
-/* The cpu state corresponding to 'searched_pc' is restored.
78
- * When reset_icount is true, current TB will be interrupted and
79
- * icount should be recalculated.
80
- */
81
-int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
82
- uintptr_t searched_pc, bool reset_icount)
83
+static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc,
84
+ uint64_t *data)
85
{
86
- uint64_t data[TARGET_INSN_START_WORDS];
87
- uintptr_t host_pc = (uintptr_t)tb->tc.ptr;
88
+ uintptr_t iter_pc = (uintptr_t)tb->tc.ptr;
89
const uint8_t *p = tb->tc.ptr + tb->tc.size;
90
int i, j, num_insns = tb->icount;
91
-#ifdef CONFIG_PROFILER
92
- TCGProfile *prof = &tcg_ctx->prof;
93
- int64_t ti = profile_getclock();
94
-#endif
95
96
- searched_pc -= GETPC_ADJ;
97
+ host_pc -= GETPC_ADJ;
98
99
- if (searched_pc < host_pc) {
100
+ if (host_pc < iter_pc) {
101
return -1;
102
}
103
104
- memset(data, 0, sizeof(data));
105
+ memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS);
106
if (!TARGET_TB_PCREL) {
107
data[0] = tb_pc(tb);
108
}
109
110
- /* Reconstruct the stored insn data while looking for the point at
111
- which the end of the insn exceeds the searched_pc. */
112
+ /*
113
+ * Reconstruct the stored insn data while looking for the point
114
+ * at which the end of the insn exceeds host_pc.
115
+ */
116
for (i = 0; i < num_insns; ++i) {
117
for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
118
data[j] += decode_sleb128(&p);
119
}
120
- host_pc += decode_sleb128(&p);
121
- if (host_pc > searched_pc) {
122
- goto found;
123
+ iter_pc += decode_sleb128(&p);
124
+ if (iter_pc > host_pc) {
125
+ return num_insns - i;
126
}
127
}
128
return -1;
129
+}
62
+}
130
+
63
+
131
+/*
64
+#endif /* HOST_STORE_INSERT_AL16_H */
132
+ * The cpu state corresponding to 'host_pc' is restored.
65
diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc
133
+ * When reset_icount is true, current TB will be interrupted and
66
index XXXXXXX..XXXXXXX 100644
134
+ * icount should be recalculated.
67
--- a/accel/tcg/ldst_atomicity.c.inc
135
+ */
68
+++ b/accel/tcg/ldst_atomicity.c.inc
136
+void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
69
@@ -XXX,XX +XXX,XX @@
137
+ uintptr_t host_pc, bool reset_icount)
70
*/
138
+{
71
139
+ uint64_t data[TARGET_INSN_START_WORDS];
72
#include "host/load-extract-al16-al8.h"
140
+#ifdef CONFIG_PROFILER
73
+#include "host/store-insert-al16.h"
141
+ TCGProfile *prof = &tcg_ctx->prof;
74
142
+ int64_t ti = profile_getclock();
75
#ifdef CONFIG_ATOMIC64
143
+#endif
76
# define HAVE_al8 true
144
+ int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data);
77
@@ -XXX,XX +XXX,XX @@ static void store_atom_insert_al8(uint64_t *p, uint64_t val, uint64_t msk)
145
+
78
__ATOMIC_RELAXED, __ATOMIC_RELAXED));
146
+ if (insns_left < 0) {
147
+ return;
148
+ }
149
150
- found:
151
if (reset_icount && (tb_cflags(tb) & CF_USE_ICOUNT)) {
152
assert(icount_enabled());
153
- /* Reset the cycle counter to the start of the block
154
- and shift if to the number of actually executed instructions */
155
- cpu_neg(cpu)->icount_decr.u16.low += num_insns - i;
156
+ /*
157
+ * Reset the cycle counter to the start of the block and
158
+ * shift if to the number of actually executed instructions.
159
+ */
160
+ cpu_neg(cpu)->icount_decr.u16.low += insns_left;
161
}
162
163
cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data);
164
@@ -XXX,XX +XXX,XX @@ int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
165
prof->restore_time + profile_getclock() - ti);
166
qatomic_set(&prof->restore_count, prof->restore_count + 1);
167
#endif
168
- return 0;
169
}
79
}
170
80
171
bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit)
81
-/**
172
@@ -XXX,XX +XXX,XX @@ bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit)
82
- * store_atom_insert_al16:
173
return false;
83
- * @p: host address
174
}
84
- * @val: shifted value to store
175
85
- * @msk: mask for value to store
176
+bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data)
86
- *
177
+{
87
- * Atomically store @val to @p masked by @msk.
178
+ if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
88
- */
179
+ TranslationBlock *tb = tcg_tb_lookup(host_pc);
89
-static void ATTRIBUTE_ATOMIC128_OPT
180
+ if (tb) {
90
-store_atom_insert_al16(Int128 *ps, Int128Alias val, Int128Alias msk)
181
+ return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0;
91
-{
182
+ }
92
-#if defined(CONFIG_ATOMIC128)
183
+ }
93
- __uint128_t *pu, old, new;
184
+ return false;
94
-
185
+}
95
- /* With CONFIG_ATOMIC128, we can avoid the memory barriers. */
186
+
96
- pu = __builtin_assume_aligned(ps, 16);
187
void page_init(void)
97
- old = *pu;
188
{
98
- do {
189
page_size_init();
99
- new = (old & ~msk.u) | val.u;
100
- } while (!__atomic_compare_exchange_n(pu, &old, new, true,
101
- __ATOMIC_RELAXED, __ATOMIC_RELAXED));
102
-#elif defined(CONFIG_CMPXCHG128)
103
- __uint128_t *pu, old, new;
104
-
105
- /*
106
- * Without CONFIG_ATOMIC128, __atomic_compare_exchange_n will always
107
- * defer to libatomic, so we must use __sync_*_compare_and_swap_16
108
- * and accept the sequential consistency that comes with it.
109
- */
110
- pu = __builtin_assume_aligned(ps, 16);
111
- do {
112
- old = *pu;
113
- new = (old & ~msk.u) | val.u;
114
- } while (!__sync_bool_compare_and_swap_16(pu, old, new));
115
-#else
116
- qemu_build_not_reached();
117
-#endif
118
-}
119
-
120
/**
121
* store_bytes_leN:
122
* @pv: host address
190
--
123
--
191
2.34.1
124
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
.../x86_64/host/load-extract-al16-al8.h | 50 +++++++++++++++++++
5
1 file changed, 50 insertions(+)
6
create mode 100644 host/include/x86_64/host/load-extract-al16-al8.h
1
7
8
diff --git a/host/include/x86_64/host/load-extract-al16-al8.h b/host/include/x86_64/host/load-extract-al16-al8.h
9
new file mode 100644
10
index XXXXXXX..XXXXXXX
11
--- /dev/null
12
+++ b/host/include/x86_64/host/load-extract-al16-al8.h
13
@@ -XXX,XX +XXX,XX @@
14
+/*
15
+ * SPDX-License-Identifier: GPL-2.0-or-later
16
+ * Atomic extract 64 from 128-bit, x86_64 version.
17
+ *
18
+ * Copyright (C) 2023 Linaro, Ltd.
19
+ */
20
+
21
+#ifndef X86_64_LOAD_EXTRACT_AL16_AL8_H
22
+#define X86_64_LOAD_EXTRACT_AL16_AL8_H
23
+
24
+#ifdef CONFIG_INT128_TYPE
25
+#include "host/cpuinfo.h"
26
+
27
+/**
28
+ * load_atom_extract_al16_or_al8:
29
+ * @pv: host address
30
+ * @s: object size in bytes, @s <= 8.
31
+ *
32
+ * Load @s bytes from @pv, when pv % s != 0. If [p, p+s-1] does not
33
+ * cross an 16-byte boundary then the access must be 16-byte atomic,
34
+ * otherwise the access must be 8-byte atomic.
35
+ */
36
+static inline uint64_t ATTRIBUTE_ATOMIC128_OPT
37
+load_atom_extract_al16_or_al8(void *pv, int s)
38
+{
39
+ uintptr_t pi = (uintptr_t)pv;
40
+ __int128_t *ptr_align = (__int128_t *)(pi & ~7);
41
+ int shr = (pi & 7) * 8;
42
+ Int128Alias r;
43
+
44
+ /*
45
+ * ptr_align % 16 is now only 0 or 8.
46
+ * If the host supports atomic loads with VMOVDQU, then always use that,
47
+ * making the branch highly predictable. Otherwise we must use VMOVDQA
48
+ * when ptr_align % 16 == 0 for 16-byte atomicity.
49
+ */
50
+ if ((cpuinfo & CPUINFO_ATOMIC_VMOVDQU) || (pi & 8)) {
51
+ asm("vmovdqu %1, %0" : "=x" (r.i) : "m" (*ptr_align));
52
+ } else {
53
+ asm("vmovdqa %1, %0" : "=x" (r.i) : "m" (*ptr_align));
54
+ }
55
+ return int128_getlo(int128_urshift(r.s, shr));
56
+}
57
+#else
58
+/* Fallback definition that must be optimized away, or error. */
59
+uint64_t QEMU_ERROR("unsupported atomic")
60
+ load_atom_extract_al16_or_al8(void *pv, int s);
61
+#endif
62
+
63
+#endif /* X86_64_LOAD_EXTRACT_AL16_AL8_H */
64
--
65
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
.../aarch64/host/load-extract-al16-al8.h | 40 +++++++++++++++++++
5
1 file changed, 40 insertions(+)
6
create mode 100644 host/include/aarch64/host/load-extract-al16-al8.h
1
7
8
diff --git a/host/include/aarch64/host/load-extract-al16-al8.h b/host/include/aarch64/host/load-extract-al16-al8.h
9
new file mode 100644
10
index XXXXXXX..XXXXXXX
11
--- /dev/null
12
+++ b/host/include/aarch64/host/load-extract-al16-al8.h
13
@@ -XXX,XX +XXX,XX @@
14
+/*
15
+ * SPDX-License-Identifier: GPL-2.0-or-later
16
+ * Atomic extract 64 from 128-bit, AArch64 version.
17
+ *
18
+ * Copyright (C) 2023 Linaro, Ltd.
19
+ */
20
+
21
+#ifndef AARCH64_LOAD_EXTRACT_AL16_AL8_H
22
+#define AARCH64_LOAD_EXTRACT_AL16_AL8_H
23
+
24
+#include "host/cpuinfo.h"
25
+#include "tcg/debug-assert.h"
26
+
27
+/**
28
+ * load_atom_extract_al16_or_al8:
29
+ * @pv: host address
30
+ * @s: object size in bytes, @s <= 8.
31
+ *
32
+ * Load @s bytes from @pv, when pv % s != 0. If [p, p+s-1] does not
33
+ * cross an 16-byte boundary then the access must be 16-byte atomic,
34
+ * otherwise the access must be 8-byte atomic.
35
+ */
36
+static inline uint64_t load_atom_extract_al16_or_al8(void *pv, int s)
37
+{
38
+ uintptr_t pi = (uintptr_t)pv;
39
+ __int128_t *ptr_align = (__int128_t *)(pi & ~7);
40
+ int shr = (pi & 7) * 8;
41
+ uint64_t l, h;
42
+
43
+ /*
44
+ * With FEAT_LSE2, LDP is single-copy atomic if 16-byte aligned
45
+ * and single-copy atomic on the parts if 8-byte aligned.
46
+ * All we need do is align the pointer mod 8.
47
+ */
48
+ tcg_debug_assert(HAVE_ATOMIC128_RO);
49
+ asm("ldp %0, %1, %2" : "=r"(l), "=r"(h) : "m"(*ptr_align));
50
+ return (l >> shr) | (h << (-shr & 63));
51
+}
52
+
53
+#endif /* AARCH64_LOAD_EXTRACT_AL16_AL8_H */
54
--
55
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
host/include/aarch64/host/store-insert-al16.h | 47 +++++++++++++++++++
5
1 file changed, 47 insertions(+)
6
create mode 100644 host/include/aarch64/host/store-insert-al16.h
1
7
8
diff --git a/host/include/aarch64/host/store-insert-al16.h b/host/include/aarch64/host/store-insert-al16.h
9
new file mode 100644
10
index XXXXXXX..XXXXXXX
11
--- /dev/null
12
+++ b/host/include/aarch64/host/store-insert-al16.h
13
@@ -XXX,XX +XXX,XX @@
14
+/*
15
+ * SPDX-License-Identifier: GPL-2.0-or-later
16
+ * Atomic store insert into 128-bit, AArch64 version.
17
+ *
18
+ * Copyright (C) 2023 Linaro, Ltd.
19
+ */
20
+
21
+#ifndef AARCH64_STORE_INSERT_AL16_H
22
+#define AARCH64_STORE_INSERT_AL16_H
23
+
24
+/**
25
+ * store_atom_insert_al16:
26
+ * @p: host address
27
+ * @val: shifted value to store
28
+ * @msk: mask for value to store
29
+ *
30
+ * Atomically store @val to @p masked by @msk.
31
+ */
32
+static inline void ATTRIBUTE_ATOMIC128_OPT
33
+store_atom_insert_al16(Int128 *ps, Int128 val, Int128 msk)
34
+{
35
+ /*
36
+ * GCC only implements __sync* primitives for int128 on aarch64.
37
+ * We can do better without the barriers, and integrating the
38
+ * arithmetic into the load-exclusive/store-conditional pair.
39
+ */
40
+ uint64_t tl, th, vl, vh, ml, mh;
41
+ uint32_t fail;
42
+
43
+ qemu_build_assert(!HOST_BIG_ENDIAN);
44
+ vl = int128_getlo(val);
45
+ vh = int128_gethi(val);
46
+ ml = int128_getlo(msk);
47
+ mh = int128_gethi(msk);
48
+
49
+ asm("0: ldxp %[l], %[h], %[mem]\n\t"
50
+ "bic %[l], %[l], %[ml]\n\t"
51
+ "bic %[h], %[h], %[mh]\n\t"
52
+ "orr %[l], %[l], %[vl]\n\t"
53
+ "orr %[h], %[h], %[vh]\n\t"
54
+ "stxp %w[f], %[l], %[h], %[mem]\n\t"
55
+ "cbnz %w[f], 0b\n"
56
+ : [mem] "+Q"(*ps), [f] "=&r"(fail), [l] "=&r"(tl), [h] "=&r"(th)
57
+ : [vl] "r"(vl), [vh] "r"(vh), [ml] "r"(ml), [mh] "r"(mh));
58
+}
59
+
60
+#endif /* AARCH64_STORE_INSERT_AL16_H */
61
--
62
2.34.1
diff view generated by jsdifflib
1
We have called cpu_restore_state asserting will_exit.
1
The last use was removed by e77c89fb086a.
2
Do not go back on that promise. This affects icount.
3
2
3
Fixes: e77c89fb086a ("cputlb: Remove static tlb sizing")
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
6
---
7
target/openrisc/sys_helper.c | 2 +-
7
tcg/aarch64/tcg-target.h | 1 -
8
1 file changed, 1 insertion(+), 1 deletion(-)
8
tcg/arm/tcg-target.h | 1 -
9
tcg/i386/tcg-target.h | 1 -
10
tcg/mips/tcg-target.h | 1 -
11
tcg/ppc/tcg-target.h | 1 -
12
tcg/riscv/tcg-target.h | 1 -
13
tcg/s390x/tcg-target.h | 1 -
14
tcg/sparc64/tcg-target.h | 1 -
15
tcg/tci/tcg-target.h | 1 -
16
9 files changed, 9 deletions(-)
9
17
10
diff --git a/target/openrisc/sys_helper.c b/target/openrisc/sys_helper.c
18
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
11
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
12
--- a/target/openrisc/sys_helper.c
20
--- a/tcg/aarch64/tcg-target.h
13
+++ b/target/openrisc/sys_helper.c
21
+++ b/tcg/aarch64/tcg-target.h
14
@@ -XXX,XX +XXX,XX @@ void HELPER(mtspr)(CPUOpenRISCState *env, target_ulong spr, target_ulong rb)
22
@@ -XXX,XX +XXX,XX @@
15
if (env->pc != rb) {
23
#include "host/cpuinfo.h"
16
env->pc = rb;
24
17
env->dflag = 0;
25
#define TCG_TARGET_INSN_UNIT_SIZE 4
18
- cpu_loop_exit(cs);
26
-#define TCG_TARGET_TLB_DISPLACEMENT_BITS 24
19
}
27
#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
20
+ cpu_loop_exit(cs);
28
21
break;
29
typedef enum {
22
30
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
23
case TO_SPR(0, 17): /* SR */
31
index XXXXXXX..XXXXXXX 100644
32
--- a/tcg/arm/tcg-target.h
33
+++ b/tcg/arm/tcg-target.h
34
@@ -XXX,XX +XXX,XX @@ extern int arm_arch;
35
#define use_armv7_instructions (__ARM_ARCH >= 7 || arm_arch >= 7)
36
37
#define TCG_TARGET_INSN_UNIT_SIZE 4
38
-#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
39
#define MAX_CODE_GEN_BUFFER_SIZE UINT32_MAX
40
41
typedef enum {
42
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
43
index XXXXXXX..XXXXXXX 100644
44
--- a/tcg/i386/tcg-target.h
45
+++ b/tcg/i386/tcg-target.h
46
@@ -XXX,XX +XXX,XX @@
47
#include "host/cpuinfo.h"
48
49
#define TCG_TARGET_INSN_UNIT_SIZE 1
50
-#define TCG_TARGET_TLB_DISPLACEMENT_BITS 31
51
52
#ifdef __x86_64__
53
# define TCG_TARGET_REG_BITS 64
54
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
55
index XXXXXXX..XXXXXXX 100644
56
--- a/tcg/mips/tcg-target.h
57
+++ b/tcg/mips/tcg-target.h
58
@@ -XXX,XX +XXX,XX @@
59
#endif
60
61
#define TCG_TARGET_INSN_UNIT_SIZE 4
62
-#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
63
#define TCG_TARGET_NB_REGS 32
64
65
#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
66
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
67
index XXXXXXX..XXXXXXX 100644
68
--- a/tcg/ppc/tcg-target.h
69
+++ b/tcg/ppc/tcg-target.h
70
@@ -XXX,XX +XXX,XX @@
71
72
#define TCG_TARGET_NB_REGS 64
73
#define TCG_TARGET_INSN_UNIT_SIZE 4
74
-#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
75
76
typedef enum {
77
TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3,
78
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
79
index XXXXXXX..XXXXXXX 100644
80
--- a/tcg/riscv/tcg-target.h
81
+++ b/tcg/riscv/tcg-target.h
82
@@ -XXX,XX +XXX,XX @@
83
#define TCG_TARGET_REG_BITS 64
84
85
#define TCG_TARGET_INSN_UNIT_SIZE 4
86
-#define TCG_TARGET_TLB_DISPLACEMENT_BITS 20
87
#define TCG_TARGET_NB_REGS 32
88
#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
89
90
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
91
index XXXXXXX..XXXXXXX 100644
92
--- a/tcg/s390x/tcg-target.h
93
+++ b/tcg/s390x/tcg-target.h
94
@@ -XXX,XX +XXX,XX @@
95
#define S390_TCG_TARGET_H
96
97
#define TCG_TARGET_INSN_UNIT_SIZE 2
98
-#define TCG_TARGET_TLB_DISPLACEMENT_BITS 19
99
100
/* We have a +- 4GB range on the branches; leave some slop. */
101
#define MAX_CODE_GEN_BUFFER_SIZE (3 * GiB)
102
diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h
103
index XXXXXXX..XXXXXXX 100644
104
--- a/tcg/sparc64/tcg-target.h
105
+++ b/tcg/sparc64/tcg-target.h
106
@@ -XXX,XX +XXX,XX @@
107
#define SPARC_TCG_TARGET_H
108
109
#define TCG_TARGET_INSN_UNIT_SIZE 4
110
-#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
111
#define TCG_TARGET_NB_REGS 32
112
#define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
113
114
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
115
index XXXXXXX..XXXXXXX 100644
116
--- a/tcg/tci/tcg-target.h
117
+++ b/tcg/tci/tcg-target.h
118
@@ -XXX,XX +XXX,XX @@
119
120
#define TCG_TARGET_INTERPRETER 1
121
#define TCG_TARGET_INSN_UNIT_SIZE 4
122
-#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
123
#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
124
125
#if UINTPTR_MAX == UINT32_MAX
24
--
126
--
25
2.34.1
127
2.34.1
26
128
27
129
diff view generated by jsdifflib
New patch
1
Invert the exit code, for use with the testsuite.
1
2
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
scripts/decodetree.py | 9 +++++++--
6
1 file changed, 7 insertions(+), 2 deletions(-)
7
8
diff --git a/scripts/decodetree.py b/scripts/decodetree.py
9
index XXXXXXX..XXXXXXX 100644
10
--- a/scripts/decodetree.py
11
+++ b/scripts/decodetree.py
12
@@ -XXX,XX +XXX,XX @@
13
formats = {}
14
allpatterns = []
15
anyextern = False
16
+testforerror = False
17
18
translate_prefix = 'trans'
19
translate_scope = 'static '
20
@@ -XXX,XX +XXX,XX @@ def error_with_file(file, lineno, *args):
21
if output_file and output_fd:
22
output_fd.close()
23
os.remove(output_file)
24
- exit(1)
25
+ exit(0 if testforerror else 1)
26
# end error_with_file
27
28
29
@@ -XXX,XX +XXX,XX @@ def main():
30
global bitop_width
31
global variablewidth
32
global anyextern
33
+ global testforerror
34
35
decode_scope = 'static '
36
37
long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=',
38
- 'static-decode=', 'varinsnwidth=']
39
+ 'static-decode=', 'varinsnwidth=', 'test-for-error']
40
try:
41
(opts, args) = getopt.gnu_getopt(sys.argv[1:], 'o:vw:', long_opts)
42
except getopt.GetoptError as err:
43
@@ -XXX,XX +XXX,XX @@ def main():
44
bitop_width = 64
45
elif insnwidth != 32:
46
error(0, 'cannot handle insns of width', insnwidth)
47
+ elif o == '--test-for-error':
48
+ testforerror = True
49
else:
50
assert False, 'unhandled option'
51
52
@@ -XXX,XX +XXX,XX @@ def main():
53
54
if output_file:
55
output_fd.close()
56
+ exit(1 if testforerror else 0)
57
# end main
58
59
60
--
61
2.34.1
diff view generated by jsdifflib
New patch
1
Two copy-paste errors walking the parse tree.
1
2
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
scripts/decodetree.py | 4 ++--
6
1 file changed, 2 insertions(+), 2 deletions(-)
7
8
diff --git a/scripts/decodetree.py b/scripts/decodetree.py
9
index XXXXXXX..XXXXXXX 100644
10
--- a/scripts/decodetree.py
11
+++ b/scripts/decodetree.py
12
@@ -XXX,XX +XXX,XX @@ def build_tree(self):
13
14
def prop_format(self):
15
for p in self.pats:
16
- p.build_tree()
17
+ p.prop_format()
18
19
def prop_width(self):
20
width = None
21
@@ -XXX,XX +XXX,XX @@ def __build_tree(pats, outerbits, outermask):
22
return t
23
24
def build_tree(self):
25
- super().prop_format()
26
+ super().build_tree()
27
self.tree = self.__build_tree(self.pats, self.fixedbits,
28
self.fixedmask)
29
30
--
31
2.34.1
diff view generated by jsdifflib
New patch
1
Test err_pattern_group_empty.decode failed with exception:
1
2
3
Traceback (most recent call last):
4
File "./scripts/decodetree.py", line 1424, in <module> main()
5
File "./scripts/decodetree.py", line 1342, in main toppat.build_tree()
6
File "./scripts/decodetree.py", line 627, in build_tree
7
self.tree = self.__build_tree(self.pats, self.fixedbits,
8
File "./scripts/decodetree.py", line 607, in __build_tree
9
fb = i.fixedbits & innermask
10
TypeError: unsupported operand type(s) for &: 'NoneType' and 'int'
11
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
14
scripts/decodetree.py | 6 ++++++
15
1 file changed, 6 insertions(+)
16
17
diff --git a/scripts/decodetree.py b/scripts/decodetree.py
18
index XXXXXXX..XXXXXXX 100644
19
--- a/scripts/decodetree.py
20
+++ b/scripts/decodetree.py
21
@@ -XXX,XX +XXX,XX @@ def output_code(self, i, extracted, outerbits, outermask):
22
output(ind, '}\n')
23
else:
24
p.output_code(i, extracted, p.fixedbits, p.fixedmask)
25
+
26
+ def build_tree(self):
27
+ if not self.pats:
28
+ error_with_file(self.file, self.lineno, 'empty pattern group')
29
+ super().build_tree()
30
+
31
#end IncMultiPattern
32
33
34
--
35
2.34.1
diff view generated by jsdifflib
1
Since we do not plan to exit, use cpu_unwind_state_data
1
Nor report any PermissionError on remove.
2
and extract exactly the data requested.
2
The primary purpose is testing with -o /dev/null.
3
4
This is a bug fix, in that we no longer clobber dflag.
5
6
Consider:
7
8
l.j L2 // branch
9
l.mfspr r1, ppc // delay
10
11
L1: boom
12
L2: l.lwa r3, (r4)
13
14
Here, dflag would be set by cpu_restore_state (because that is the current
15
state of the cpu), but but not cleared by tb_stop on exiting the TB
16
(because DisasContext has recorded the current value as zero).
17
18
The next TB begins at L2 with dflag incorrectly set. If the load has a
19
tlb miss, then the exception will be delivered as per a delay slot:
20
with DSX set in the status register and PC decremented (delay slots
21
restart by re-executing the branch). This will cause the return from
22
interrupt to go to L1, and boom!
23
3
24
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
25
---
5
---
26
target/openrisc/sys_helper.c | 11 +++++++++--
6
scripts/decodetree.py | 7 ++++++-
27
1 file changed, 9 insertions(+), 2 deletions(-)
7
1 file changed, 6 insertions(+), 1 deletion(-)
28
8
29
diff --git a/target/openrisc/sys_helper.c b/target/openrisc/sys_helper.c
9
diff --git a/scripts/decodetree.py b/scripts/decodetree.py
30
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
31
--- a/target/openrisc/sys_helper.c
11
--- a/scripts/decodetree.py
32
+++ b/target/openrisc/sys_helper.c
12
+++ b/scripts/decodetree.py
33
@@ -XXX,XX +XXX,XX @@ target_ulong HELPER(mfspr)(CPUOpenRISCState *env, target_ulong rd,
13
@@ -XXX,XX +XXX,XX @@ def error_with_file(file, lineno, *args):
34
target_ulong spr)
14
35
{
15
if output_file and output_fd:
36
#ifndef CONFIG_USER_ONLY
16
output_fd.close()
37
+ uint64_t data[TARGET_INSN_START_WORDS];
17
- os.remove(output_file)
38
MachineState *ms = MACHINE(qdev_get_machine());
18
+ # Do not try to remove e.g. -o /dev/null
39
OpenRISCCPU *cpu = env_archcpu(env);
19
+ if not output_file.startswith("/dev"):
40
CPUState *cs = env_cpu(env);
20
+ try:
41
@@ -XXX,XX +XXX,XX @@ target_ulong HELPER(mfspr)(CPUOpenRISCState *env, target_ulong rd,
21
+ os.remove(output_file)
42
return env->evbar;
22
+ except PermissionError:
43
23
+ pass
44
case TO_SPR(0, 16): /* NPC (equals PC) */
24
exit(0 if testforerror else 1)
45
- cpu_restore_state(cs, GETPC(), false);
25
# end error_with_file
46
+ if (cpu_unwind_state_data(cs, GETPC(), data)) {
26
47
+ return data[0];
48
+ }
49
return env->pc;
50
51
case TO_SPR(0, 17): /* SR */
52
return cpu_get_sr(env);
53
54
case TO_SPR(0, 18): /* PPC */
55
- cpu_restore_state(cs, GETPC(), false);
56
+ if (cpu_unwind_state_data(cs, GETPC(), data)) {
57
+ if (data[1] & 2) {
58
+ return data[0] - 4;
59
+ }
60
+ }
61
return env->ppc;
62
63
case TO_SPR(0, 32): /* EPCR */
64
--
27
--
65
2.34.1
28
2.34.1
diff view generated by jsdifflib
New patch
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
---
3
tests/decode/check.sh | 24 ----------------
4
tests/decode/meson.build | 59 ++++++++++++++++++++++++++++++++++++++++
5
tests/meson.build | 5 +---
6
3 files changed, 60 insertions(+), 28 deletions(-)
7
delete mode 100755 tests/decode/check.sh
8
create mode 100644 tests/decode/meson.build
1
9
10
diff --git a/tests/decode/check.sh b/tests/decode/check.sh
11
deleted file mode 100755
12
index XXXXXXX..XXXXXXX
13
--- a/tests/decode/check.sh
14
+++ /dev/null
15
@@ -XXX,XX +XXX,XX @@
16
-#!/bin/sh
17
-# This work is licensed under the terms of the GNU LGPL, version 2 or later.
18
-# See the COPYING.LIB file in the top-level directory.
19
-
20
-PYTHON=$1
21
-DECODETREE=$2
22
-E=0
23
-
24
-# All of these tests should produce errors
25
-for i in err_*.decode; do
26
- if $PYTHON $DECODETREE $i > /dev/null 2> /dev/null; then
27
- # Pass, aka failed to fail.
28
- echo FAIL: $i 1>&2
29
- E=1
30
- fi
31
-done
32
-
33
-for i in succ_*.decode; do
34
- if ! $PYTHON $DECODETREE $i > /dev/null 2> /dev/null; then
35
- echo FAIL:$i 1>&2
36
- fi
37
-done
38
-
39
-exit $E
40
diff --git a/tests/decode/meson.build b/tests/decode/meson.build
41
new file mode 100644
42
index XXXXXXX..XXXXXXX
43
--- /dev/null
44
+++ b/tests/decode/meson.build
45
@@ -XXX,XX +XXX,XX @@
46
+err_tests = [
47
+ 'err_argset1.decode',
48
+ 'err_argset2.decode',
49
+ 'err_field1.decode',
50
+ 'err_field2.decode',
51
+ 'err_field3.decode',
52
+ 'err_field4.decode',
53
+ 'err_field5.decode',
54
+ 'err_field6.decode',
55
+ 'err_init1.decode',
56
+ 'err_init2.decode',
57
+ 'err_init3.decode',
58
+ 'err_init4.decode',
59
+ 'err_overlap1.decode',
60
+ 'err_overlap2.decode',
61
+ 'err_overlap3.decode',
62
+ 'err_overlap4.decode',
63
+ 'err_overlap5.decode',
64
+ 'err_overlap6.decode',
65
+ 'err_overlap7.decode',
66
+ 'err_overlap8.decode',
67
+ 'err_overlap9.decode',
68
+ 'err_pattern_group_empty.decode',
69
+ 'err_pattern_group_ident1.decode',
70
+ 'err_pattern_group_ident2.decode',
71
+ 'err_pattern_group_nest1.decode',
72
+ 'err_pattern_group_nest2.decode',
73
+ 'err_pattern_group_nest3.decode',
74
+ 'err_pattern_group_overlap1.decode',
75
+ 'err_width1.decode',
76
+ 'err_width2.decode',
77
+ 'err_width3.decode',
78
+ 'err_width4.decode',
79
+]
80
+
81
+succ_tests = [
82
+ 'succ_argset_type1.decode',
83
+ 'succ_function.decode',
84
+ 'succ_ident1.decode',
85
+ 'succ_pattern_group_nest1.decode',
86
+ 'succ_pattern_group_nest2.decode',
87
+ 'succ_pattern_group_nest3.decode',
88
+ 'succ_pattern_group_nest4.decode',
89
+]
90
+
91
+suite = 'decodetree'
92
+decodetree = find_program(meson.project_source_root() / 'scripts/decodetree.py')
93
+
94
+foreach t: err_tests
95
+ test(fs.replace_suffix(t, ''),
96
+ decodetree, args: ['-o', '/dev/null', '--test-for-error', files(t)],
97
+ suite: suite)
98
+endforeach
99
+
100
+foreach t: succ_tests
101
+ test(fs.replace_suffix(t, ''),
102
+ decodetree, args: ['-o', '/dev/null', files(t)],
103
+ suite: suite)
104
+endforeach
105
diff --git a/tests/meson.build b/tests/meson.build
106
index XXXXXXX..XXXXXXX 100644
107
--- a/tests/meson.build
108
+++ b/tests/meson.build
109
@@ -XXX,XX +XXX,XX @@ if have_tools and have_vhost_user and 'CONFIG_LINUX' in config_host
110
dependencies: [qemuutil, vhost_user])
111
endif
112
113
-test('decodetree', sh,
114
- args: [ files('decode/check.sh'), config_host['PYTHON'], files('../scripts/decodetree.py') ],
115
- workdir: meson.current_source_dir() / 'decode',
116
- suite: 'decodetree')
117
+subdir('decode')
118
119
if 'CONFIG_TCG' in config_all
120
subdir('fp')
121
--
122
2.34.1
diff view generated by jsdifflib
New patch
1
From: Peter Maydell <peter.maydell@linaro.org>
1
2
3
Document the named field syntax that we want to implement for the
4
decodetree script. This allows a field to be defined in terms of
5
some other field that the instruction pattern has already set, for
6
example:
7
8
%sz_imm 10:3 sz:3 !function=expand_sz_imm
9
10
to allow a function to be passed both an immediate field from the
11
instruction and also a sz value which might have been specified by
12
the instruction pattern directly (sz=1, etc) rather than being a
13
simple field within the instruction.
14
15
Note that the restriction on not having the format referring to the
16
pattern and the pattern referring to the format simultaneously is a
17
restriction of the decoder generator rather than inherently being a
18
silly thing to do.
19
20
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
21
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
22
Message-Id: <20230523120447.728365-3-peter.maydell@linaro.org>
23
---
24
docs/devel/decodetree.rst | 33 ++++++++++++++++++++++++++++-----
25
1 file changed, 28 insertions(+), 5 deletions(-)
26
27
diff --git a/docs/devel/decodetree.rst b/docs/devel/decodetree.rst
28
index XXXXXXX..XXXXXXX 100644
29
--- a/docs/devel/decodetree.rst
30
+++ b/docs/devel/decodetree.rst
31
@@ -XXX,XX +XXX,XX @@ Fields
32
33
Syntax::
34
35
- field_def := '%' identifier ( unnamed_field )* ( !function=identifier )?
36
+ field_def := '%' identifier ( field )* ( !function=identifier )?
37
+ field := unnamed_field | named_field
38
unnamed_field := number ':' ( 's' ) number
39
+ named_field := identifier ':' ( 's' ) number
40
41
For *unnamed_field*, the first number is the least-significant bit position
42
of the field and the second number is the length of the field. If the 's' is
43
-present, the field is considered signed. If multiple ``unnamed_fields`` are
44
-present, they are concatenated. In this way one can define disjoint fields.
45
+present, the field is considered signed.
46
+
47
+A *named_field* refers to some other field in the instruction pattern
48
+or format. Regardless of the length of the other field where it is
49
+defined, it will be inserted into this field with the specified
50
+signedness and bit width.
51
+
52
+Field definitions that involve loops (i.e. where a field is defined
53
+directly or indirectly in terms of itself) are errors.
54
+
55
+A format can include fields that refer to named fields that are
56
+defined in the instruction pattern(s) that use the format.
57
+Conversely, an instruction pattern can include fields that refer to
58
+named fields that are defined in the format it uses. However you
59
+cannot currently do both at once (i.e. pattern P uses format F; F has
60
+a field A that refers to a named field B that is defined in P, and P
61
+has a field C that refers to a named field D that is defined in F).
62
+
63
+If multiple ``fields`` are present, they are concatenated.
64
+In this way one can define disjoint fields.
65
66
If ``!function`` is specified, the concatenated result is passed through the
67
named function, taking and returning an integral value.
68
69
-One may use ``!function`` with zero ``unnamed_fields``. This case is called
70
+One may use ``!function`` with zero ``fields``. This case is called
71
a *parameter*, and the named function is only passed the ``DisasContext``
72
and returns an integral value extracted from there.
73
74
-A field with no ``unnamed_fields`` and no ``!function`` is in error.
75
+A field with no ``fields`` and no ``!function`` is in error.
76
77
Field examples:
78
79
@@ -XXX,XX +XXX,XX @@ Field examples:
80
| %shimm8 5:s8 13:1 | expand_shimm8(sextract(i, 5, 8) << 1 | |
81
| !function=expand_shimm8 | extract(i, 13, 1)) |
82
+---------------------------+---------------------------------------------+
83
+| %sz_imm 10:2 sz:3 | expand_sz_imm(extract(i, 10, 2) << 3 | |
84
+| !function=expand_sz_imm | extract(a->sz, 0, 3)) |
85
++---------------------------+---------------------------------------------+
86
87
Argument Sets
88
=============
89
--
90
2.34.1
diff view generated by jsdifflib
New patch
1
From: Peter Maydell <peter.maydell@linaro.org>
1
2
3
To support referring to other named fields in field definitions, we
4
need to pass the str_extract() method a function which tells it how
5
to emit the code for a previously initialized named field. (In
6
Pattern::output_code() the other field will be "u.f_foo.field", and
7
in Format::output_extract() it is "a->field".)
8
9
Refactor the two callsites that currently do "output code to
10
initialize each field", and have them pass a lambda that defines how
11
to format the lvalue in each case. This is then used both in
12
emitting the LHS of the assignment and also passed down to
13
str_extract() as a new argument (unused at the moment, but will be
14
used in the following patch).
15
16
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
17
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
18
Message-Id: <20230523120447.728365-4-peter.maydell@linaro.org>
19
---
20
scripts/decodetree.py | 26 +++++++++++++++-----------
21
1 file changed, 15 insertions(+), 11 deletions(-)
22
23
diff --git a/scripts/decodetree.py b/scripts/decodetree.py
24
index XXXXXXX..XXXXXXX 100644
25
--- a/scripts/decodetree.py
26
+++ b/scripts/decodetree.py
27
@@ -XXX,XX +XXX,XX @@ def __str__(self):
28
s = ''
29
return str(self.pos) + ':' + s + str(self.len)
30
31
- def str_extract(self):
32
+ def str_extract(self, lvalue_formatter):
33
global bitop_width
34
s = 's' if self.sign else ''
35
return f'{s}extract{bitop_width}(insn, {self.pos}, {self.len})'
36
@@ -XXX,XX +XXX,XX @@ def __init__(self, subs, mask):
37
def __str__(self):
38
return str(self.subs)
39
40
- def str_extract(self):
41
+ def str_extract(self, lvalue_formatter):
42
global bitop_width
43
ret = '0'
44
pos = 0
45
for f in reversed(self.subs):
46
- ext = f.str_extract()
47
+ ext = f.str_extract(lvalue_formatter)
48
if pos == 0:
49
ret = ext
50
else:
51
@@ -XXX,XX +XXX,XX @@ def __init__(self, value):
52
def __str__(self):
53
return str(self.value)
54
55
- def str_extract(self):
56
+ def str_extract(self, lvalue_formatter):
57
return str(self.value)
58
59
def __cmp__(self, other):
60
@@ -XXX,XX +XXX,XX @@ def __init__(self, func, base):
61
def __str__(self):
62
return self.func + '(' + str(self.base) + ')'
63
64
- def str_extract(self):
65
- return self.func + '(ctx, ' + self.base.str_extract() + ')'
66
+ def str_extract(self, lvalue_formatter):
67
+ return (self.func + '(ctx, '
68
+ + self.base.str_extract(lvalue_formatter) + ')')
69
70
def __eq__(self, other):
71
return self.func == other.func and self.base == other.base
72
@@ -XXX,XX +XXX,XX @@ def __init__(self, func):
73
def __str__(self):
74
return self.func
75
76
- def str_extract(self):
77
+ def str_extract(self, lvalue_formatter):
78
return self.func + '(ctx)'
79
80
def __eq__(self, other):
81
@@ -XXX,XX +XXX,XX @@ def __str__(self):
82
83
def str1(self, i):
84
return str_indent(i) + self.__str__()
85
+
86
+ def output_fields(self, indent, lvalue_formatter):
87
+ for n, f in self.fields.items():
88
+ output(indent, lvalue_formatter(n), ' = ',
89
+ f.str_extract(lvalue_formatter), ';\n')
90
# end General
91
92
93
@@ -XXX,XX +XXX,XX @@ def extract_name(self):
94
def output_extract(self):
95
output('static void ', self.extract_name(), '(DisasContext *ctx, ',
96
self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n')
97
- for n, f in self.fields.items():
98
- output(' a->', n, ' = ', f.str_extract(), ';\n')
99
+ self.output_fields(str_indent(4), lambda n: 'a->' + n)
100
output('}\n\n')
101
# end Format
102
103
@@ -XXX,XX +XXX,XX @@ def output_code(self, i, extracted, outerbits, outermask):
104
if not extracted:
105
output(ind, self.base.extract_name(),
106
'(ctx, &u.f_', arg, ', insn);\n')
107
- for n, f in self.fields.items():
108
- output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n')
109
+ self.output_fields(ind, lambda n: 'u.f_' + arg + '.' + n)
110
output(ind, 'if (', translate_prefix, '_', self.name,
111
'(ctx, &u.f_', arg, ')) return true;\n')
112
113
--
114
2.34.1
diff view generated by jsdifflib
New patch
1
From: Peter Maydell <peter.maydell@linaro.org>
1
2
3
To support named fields, we will need to be able to do a topological
4
sort (so that we ensure that we output the assignment to field A
5
before the assignment to field B if field B refers to field A by
6
name). The good news is that there is a tsort in the python standard
7
library; the bad news is that it was only added in Python 3.9.
8
9
To bridge the gap between our current minimum supported Python
10
version and 3.9, provide a local implementation that has the
11
same API as the stdlib version for the parts we care about.
12
In future when QEMU's minimum Python version requirement reaches
13
3.9 we can delete this code and replace it with an 'import' line.
14
15
The core of this implementation is based on
16
https://code.activestate.com/recipes/578272-topological-sort/
17
which is MIT-licensed.
18
19
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
20
Acked-by: Richard Henderson <richard.henderson@linaro.org>
21
Message-Id: <20230523120447.728365-5-peter.maydell@linaro.org>
22
---
23
scripts/decodetree.py | 74 +++++++++++++++++++++++++++++++++++++++++++
24
1 file changed, 74 insertions(+)
25
26
diff --git a/scripts/decodetree.py b/scripts/decodetree.py
27
index XXXXXXX..XXXXXXX 100644
28
--- a/scripts/decodetree.py
29
+++ b/scripts/decodetree.py
30
@@ -XXX,XX +XXX,XX @@
31
re_fmt_ident = '@[a-zA-Z0-9_]*'
32
re_pat_ident = '[a-zA-Z0-9_]*'
33
34
+# Local implementation of a topological sort. We use the same API that
35
+# the Python graphlib does, so that when QEMU moves forward to a
36
+# baseline of Python 3.9 or newer this code can all be dropped and
37
+# replaced with:
38
+# from graphlib import TopologicalSorter, CycleError
39
+#
40
+# https://docs.python.org/3.9/library/graphlib.html#graphlib.TopologicalSorter
41
+#
42
+# We only implement the parts of TopologicalSorter we care about:
43
+# ts = TopologicalSorter(graph=None)
44
+# create the sorter. graph is a dictionary whose keys are
45
+# nodes and whose values are lists of the predecessors of that node.
46
+# (That is, if graph contains "A" -> ["B", "C"] then we must output
47
+# B and C before A.)
48
+# ts.static_order()
49
+# returns a list of all the nodes in sorted order, or raises CycleError
50
+# CycleError
51
+# exception raised if there are cycles in the graph. The second
52
+# element in the args attribute is a list of nodes which form a
53
+# cycle; the first and last element are the same, eg [a, b, c, a]
54
+# (Our implementation doesn't give the order correctly.)
55
+#
56
+# For our purposes we can assume that the data set is always small
57
+# (typically 10 nodes or less, actual links in the graph very rare),
58
+# so we don't need to worry about efficiency of implementation.
59
+#
60
+# The core of this implementation is from
61
+# https://code.activestate.com/recipes/578272-topological-sort/
62
+# (but updated to Python 3), and is under the MIT license.
63
+
64
+class CycleError(ValueError):
65
+ """Subclass of ValueError raised if cycles exist in the graph"""
66
+ pass
67
+
68
+class TopologicalSorter:
69
+ """Topologically sort a graph"""
70
+ def __init__(self, graph=None):
71
+ self.graph = graph
72
+
73
+ def static_order(self):
74
+ # We do the sort right here, unlike the stdlib version
75
+ from functools import reduce
76
+ data = {}
77
+ r = []
78
+
79
+ if not self.graph:
80
+ return []
81
+
82
+ # This code wants the values in the dict to be specifically sets
83
+ for k, v in self.graph.items():
84
+ data[k] = set(v)
85
+
86
+ # Find all items that don't depend on anything.
87
+ extra_items_in_deps = (reduce(set.union, data.values())
88
+ - set(data.keys()))
89
+ # Add empty dependencies where needed
90
+ data.update({item:{} for item in extra_items_in_deps})
91
+ while True:
92
+ ordered = set(item for item, dep in data.items() if not dep)
93
+ if not ordered:
94
+ break
95
+ r.extend(ordered)
96
+ data = {item: (dep - ordered)
97
+ for item, dep in data.items()
98
+ if item not in ordered}
99
+ if data:
100
+ # This doesn't give as nice results as the stdlib, which
101
+ # gives you the cycle by listing the nodes in order. Here
102
+ # we only know the nodes in the cycle but not their order.
103
+ raise CycleError(f'nodes are in a cycle', list(data.keys()))
104
+
105
+ return r
106
+# end TopologicalSorter
107
+
108
def error_with_file(file, lineno, *args):
109
"""Print an error message from file:line and args and exit."""
110
global output_file
111
--
112
2.34.1
diff view generated by jsdifflib
New patch
1
1
From: Peter Maydell <peter.maydell@linaro.org>
2
3
Implement support for named fields, i.e. where one field is defined
4
in terms of another, rather than directly in terms of bits extracted
5
from the instruction.
6
7
The new method referenced_fields() on all the Field classes returns a
8
list of fields that this field references. This just passes through,
9
except for the new NamedField class.
10
11
We can then use referenced_fields() to:
12
* construct a list of 'dangling references' for a format or
13
pattern, which is the fields that the format/pattern uses but
14
doesn't define itself
15
* do a topological sort, so that we output "field = value"
16
assignments in an order that means that we assign a field before
17
we reference it in a subsequent assignment
18
* check when we output the code for a pattern whether we need to
19
fill in the format fields before or after the pattern fields, and
20
do other error checking
21
22
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
23
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
24
Message-Id: <20230523120447.728365-6-peter.maydell@linaro.org>
25
---
26
scripts/decodetree.py | 145 ++++++++++++++++++++++++++++++++++++++++--
27
1 file changed, 139 insertions(+), 6 deletions(-)
28
29
diff --git a/scripts/decodetree.py b/scripts/decodetree.py
30
index XXXXXXX..XXXXXXX 100644
31
--- a/scripts/decodetree.py
32
+++ b/scripts/decodetree.py
33
@@ -XXX,XX +XXX,XX @@ def str_extract(self, lvalue_formatter):
34
s = 's' if self.sign else ''
35
return f'{s}extract{bitop_width}(insn, {self.pos}, {self.len})'
36
37
+ def referenced_fields(self):
38
+ return []
39
+
40
def __eq__(self, other):
41
return self.sign == other.sign and self.mask == other.mask
42
43
@@ -XXX,XX +XXX,XX @@ def str_extract(self, lvalue_formatter):
44
pos += f.len
45
return ret
46
47
+ def referenced_fields(self):
48
+ l = []
49
+ for f in self.subs:
50
+ l.extend(f.referenced_fields())
51
+ return l
52
+
53
def __ne__(self, other):
54
if len(self.subs) != len(other.subs):
55
return True
56
@@ -XXX,XX +XXX,XX @@ def __str__(self):
57
def str_extract(self, lvalue_formatter):
58
return str(self.value)
59
60
+ def referenced_fields(self):
61
+ return []
62
+
63
def __cmp__(self, other):
64
return self.value - other.value
65
# end ConstField
66
@@ -XXX,XX +XXX,XX @@ def str_extract(self, lvalue_formatter):
67
return (self.func + '(ctx, '
68
+ self.base.str_extract(lvalue_formatter) + ')')
69
70
+ def referenced_fields(self):
71
+ return self.base.referenced_fields()
72
+
73
def __eq__(self, other):
74
return self.func == other.func and self.base == other.base
75
76
@@ -XXX,XX +XXX,XX @@ def __str__(self):
77
def str_extract(self, lvalue_formatter):
78
return self.func + '(ctx)'
79
80
+ def referenced_fields(self):
81
+ return []
82
+
83
def __eq__(self, other):
84
return self.func == other.func
85
86
@@ -XXX,XX +XXX,XX @@ def __ne__(self, other):
87
return not self.__eq__(other)
88
# end ParameterField
89
90
+class NamedField:
91
+ """Class representing a field already named in the pattern"""
92
+ def __init__(self, name, sign, len):
93
+ self.mask = 0
94
+ self.sign = sign
95
+ self.len = len
96
+ self.name = name
97
+
98
+ def __str__(self):
99
+ return self.name
100
+
101
+ def str_extract(self, lvalue_formatter):
102
+ global bitop_width
103
+ s = 's' if self.sign else ''
104
+ lvalue = lvalue_formatter(self.name)
105
+ return f'{s}extract{bitop_width}({lvalue}, 0, {self.len})'
106
+
107
+ def referenced_fields(self):
108
+ return [self.name]
109
+
110
+ def __eq__(self, other):
111
+ return self.name == other.name
112
+
113
+ def __ne__(self, other):
114
+ return not self.__eq__(other)
115
+# end NamedField
116
117
class Arguments:
118
"""Class representing the extracted fields of a format"""
119
@@ -XXX,XX +XXX,XX @@ def output_def(self):
120
output('} ', self.struct_name(), ';\n\n')
121
# end Arguments
122
123
-
124
class General:
125
"""Common code between instruction formats and instruction patterns"""
126
def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w):
127
@@ -XXX,XX +XXX,XX @@ def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w):
128
self.fieldmask = fldm
129
self.fields = flds
130
self.width = w
131
+ self.dangling = None
132
133
def __str__(self):
134
return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask)
135
@@ -XXX,XX +XXX,XX @@ def __str__(self):
136
def str1(self, i):
137
return str_indent(i) + self.__str__()
138
139
+ def dangling_references(self):
140
+ # Return a list of all named references which aren't satisfied
141
+ # directly by this format/pattern. This will be either:
142
+ # * a format referring to a field which is specified by the
143
+ # pattern(s) using it
144
+ # * a pattern referring to a field which is specified by the
145
+ # format it uses
146
+ # * a user error (referring to a field that doesn't exist at all)
147
+ if self.dangling is None:
148
+ # Compute this once and cache the answer
149
+ dangling = []
150
+ for n, f in self.fields.items():
151
+ for r in f.referenced_fields():
152
+ if r not in self.fields:
153
+ dangling.append(r)
154
+ self.dangling = dangling
155
+ return self.dangling
156
+
157
def output_fields(self, indent, lvalue_formatter):
158
+ # We use a topological sort to ensure that any use of NamedField
159
+ # comes after the initialization of the field it is referencing.
160
+ graph = {}
161
for n, f in self.fields.items():
162
- output(indent, lvalue_formatter(n), ' = ',
163
- f.str_extract(lvalue_formatter), ';\n')
164
+ refs = f.referenced_fields()
165
+ graph[n] = refs
166
+
167
+ try:
168
+ ts = TopologicalSorter(graph)
169
+ for n in ts.static_order():
170
+ # We only want to emit assignments for the keys
171
+ # in our fields list, not for anything that ends up
172
+ # in the tsort graph only because it was referenced as
173
+ # a NamedField.
174
+ try:
175
+ f = self.fields[n]
176
+ output(indent, lvalue_formatter(n), ' = ',
177
+ f.str_extract(lvalue_formatter), ';\n')
178
+ except KeyError:
179
+ pass
180
+ except CycleError as e:
181
+ # The second element of args is a list of nodes which form
182
+ # a cycle (there might be others too, but only one is reported).
183
+ # Pretty-print it to tell the user.
184
+ cycle = ' => '.join(e.args[1])
185
+ error(self.lineno, 'field definitions form a cycle: ' + cycle)
186
# end General
187
188
189
@@ -XXX,XX +XXX,XX @@ def output_code(self, i, extracted, outerbits, outermask):
190
ind = str_indent(i)
191
arg = self.base.base.name
192
output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n')
193
+ # We might have named references in the format that refer to fields
194
+ # in the pattern, or named references in the pattern that refer
195
+ # to fields in the format. This affects whether we extract the fields
196
+ # for the format before or after the ones for the pattern.
197
+ # For simplicity we don't allow cross references in both directions.
198
+ # This is also where we catch the syntax error of referring to
199
+ # a nonexistent field.
200
+ fmt_refs = self.base.dangling_references()
201
+ for r in fmt_refs:
202
+ if r not in self.fields:
203
+ error(self.lineno, f'format refers to undefined field {r}')
204
+ pat_refs = self.dangling_references()
205
+ for r in pat_refs:
206
+ if r not in self.base.fields:
207
+ error(self.lineno, f'pattern refers to undefined field {r}')
208
+ if pat_refs and fmt_refs:
209
+ error(self.lineno, ('pattern that uses fields defined in format '
210
+ 'cannot use format that uses fields defined '
211
+ 'in pattern'))
212
+ if fmt_refs:
213
+ # pattern fields first
214
+ self.output_fields(ind, lambda n: 'u.f_' + arg + '.' + n)
215
+ assert not extracted, "dangling fmt refs but it was already extracted"
216
if not extracted:
217
output(ind, self.base.extract_name(),
218
'(ctx, &u.f_', arg, ', insn);\n')
219
- self.output_fields(ind, lambda n: 'u.f_' + arg + '.' + n)
220
+ if not fmt_refs:
221
+ # pattern fields last
222
+ self.output_fields(ind, lambda n: 'u.f_' + arg + '.' + n)
223
+
224
output(ind, 'if (', translate_prefix, '_', self.name,
225
'(ctx, &u.f_', arg, ')) return true;\n')
226
227
@@ -XXX,XX +XXX,XX @@ def output_code(self, i, extracted, outerbits, outermask):
228
ind = str_indent(i)
229
230
# If we identified all nodes below have the same format,
231
- # extract the fields now.
232
- if not extracted and self.base:
233
+ # extract the fields now. But don't do it if the format relies
234
+ # on named fields from the insn pattern, as those won't have
235
+ # been initialised at this point.
236
+ if not extracted and self.base and not self.base.dangling_references():
237
output(ind, self.base.extract_name(),
238
'(ctx, &u.f_', self.base.base.name, ', insn);\n')
239
extracted = True
240
@@ -XXX,XX +XXX,XX @@ def parse_field(lineno, name, toks):
241
"""Parse one instruction field from TOKS at LINENO"""
242
global fields
243
global insnwidth
244
+ global re_C_ident
245
246
# A "simple" field will have only one entry;
247
# a "multifield" will have several.
248
@@ -XXX,XX +XXX,XX @@ def parse_field(lineno, name, toks):
249
func = func[1]
250
continue
251
252
+ if re.fullmatch(re_C_ident + ':s[0-9]+', t):
253
+ # Signed named field
254
+ subtoks = t.split(':')
255
+ n = subtoks[0]
256
+ le = int(subtoks[1])
257
+ f = NamedField(n, True, le)
258
+ subs.append(f)
259
+ width += le
260
+ continue
261
+ if re.fullmatch(re_C_ident + ':[0-9]+', t):
262
+ # Unsigned named field
263
+ subtoks = t.split(':')
264
+ n = subtoks[0]
265
+ le = int(subtoks[1])
266
+ f = NamedField(n, False, le)
267
+ subs.append(f)
268
+ width += le
269
+ continue
270
+
271
if re.fullmatch('[0-9]+:s[0-9]+', t):
272
# Signed field extract
273
subtoks = t.split(':s')
274
--
275
2.34.1
diff view generated by jsdifflib
New patch
1
From: Peter Maydell <peter.maydell@linaro.org>
1
2
3
Add some tests for various cases of named-field use, both ones that
4
should work and ones that should be diagnosed as errors.
5
6
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-Id: <20230523120447.728365-7-peter.maydell@linaro.org>
9
---
10
tests/decode/err_field10.decode | 7 +++++++
11
tests/decode/err_field7.decode | 7 +++++++
12
tests/decode/err_field8.decode | 8 ++++++++
13
tests/decode/err_field9.decode | 14 ++++++++++++++
14
tests/decode/succ_named_field.decode | 19 +++++++++++++++++++
15
tests/decode/meson.build | 5 +++++
16
6 files changed, 60 insertions(+)
17
create mode 100644 tests/decode/err_field10.decode
18
create mode 100644 tests/decode/err_field7.decode
19
create mode 100644 tests/decode/err_field8.decode
20
create mode 100644 tests/decode/err_field9.decode
21
create mode 100644 tests/decode/succ_named_field.decode
22
23
diff --git a/tests/decode/err_field10.decode b/tests/decode/err_field10.decode
24
new file mode 100644
25
index XXXXXXX..XXXXXXX
26
--- /dev/null
27
+++ b/tests/decode/err_field10.decode
28
@@ -XXX,XX +XXX,XX @@
29
+# This work is licensed under the terms of the GNU LGPL, version 2 or later.
30
+# See the COPYING.LIB file in the top-level directory.
31
+
32
+# Diagnose formats which refer to undefined fields
33
+%field1 field2:3
34
+@fmt ........ ........ ........ ........ %field1
35
+insn 00000000 00000000 00000000 00000000 @fmt
36
diff --git a/tests/decode/err_field7.decode b/tests/decode/err_field7.decode
37
new file mode 100644
38
index XXXXXXX..XXXXXXX
39
--- /dev/null
40
+++ b/tests/decode/err_field7.decode
41
@@ -XXX,XX +XXX,XX @@
42
+# This work is licensed under the terms of the GNU LGPL, version 2 or later.
43
+# See the COPYING.LIB file in the top-level directory.
44
+
45
+# Diagnose fields whose definitions form a loop
46
+%field1 field2:3
47
+%field2 field1:4
48
+insn 00000000 00000000 00000000 00000000 %field1 %field2
49
diff --git a/tests/decode/err_field8.decode b/tests/decode/err_field8.decode
50
new file mode 100644
51
index XXXXXXX..XXXXXXX
52
--- /dev/null
53
+++ b/tests/decode/err_field8.decode
54
@@ -XXX,XX +XXX,XX @@
55
+# This work is licensed under the terms of the GNU LGPL, version 2 or later.
56
+# See the COPYING.LIB file in the top-level directory.
57
+
58
+# Diagnose patterns which refer to undefined fields
59
+&f1 f1 a
60
+%field1 field2:3
61
+@fmt ........ ........ ........ .... a:4 &f1
62
+insn 00000000 00000000 00000000 0000 .... @fmt f1=%field1
63
diff --git a/tests/decode/err_field9.decode b/tests/decode/err_field9.decode
64
new file mode 100644
65
index XXXXXXX..XXXXXXX
66
--- /dev/null
67
+++ b/tests/decode/err_field9.decode
68
@@ -XXX,XX +XXX,XX @@
69
+# This work is licensed under the terms of the GNU LGPL, version 2 or later.
70
+# See the COPYING.LIB file in the top-level directory.
71
+
72
+# Diagnose fields where the format refers to a field defined in the
73
+# pattern and the pattern refers to a field defined in the format.
74
+# This is theoretically not impossible to implement, but is not
75
+# supported by the script at this time.
76
+&abcd a b c d
77
+%refa a:3
78
+%refc c:4
79
+# Format defines 'c' and sets 'b' to an indirect ref to 'a'
80
+@fmt ........ ........ ........ c:8 &abcd b=%refa
81
+# Pattern defines 'a' and sets 'd' to an indirect ref to 'c'
82
+insn 00000000 00000000 00000000 ........ @fmt d=%refc a=6
83
diff --git a/tests/decode/succ_named_field.decode b/tests/decode/succ_named_field.decode
84
new file mode 100644
85
index XXXXXXX..XXXXXXX
86
--- /dev/null
87
+++ b/tests/decode/succ_named_field.decode
88
@@ -XXX,XX +XXX,XX @@
89
+# This work is licensed under the terms of the GNU LGPL, version 2 or later.
90
+# See the COPYING.LIB file in the top-level directory.
91
+
92
+# field using a named_field
93
+%imm_sz    8:8 sz:3
94
+insn 00000000 00000000 ........ 00000000 imm_sz=%imm_sz sz=1
95
+
96
+# Ditto, via a format. Here a field in the format
97
+# references a named field defined in the insn pattern:
98
+&imm_a imm alpha
99
+%foo 0:16 alpha:4
100
+@foo 00000001 ........ ........ ........ &imm_a imm=%foo
101
+i1 ........ 00000000 ........ ........ @foo alpha=1
102
+i2 ........ 00000001 ........ ........ @foo alpha=2
103
+
104
+# Here the named field is defined in the format and referenced
105
+# from the insn pattern:
106
+@bar 00000010 ........ ........ ........ &imm_a alpha=4
107
+i3 ........ 00000000 ........ ........ @bar imm=%foo
108
diff --git a/tests/decode/meson.build b/tests/decode/meson.build
109
index XXXXXXX..XXXXXXX 100644
110
--- a/tests/decode/meson.build
111
+++ b/tests/decode/meson.build
112
@@ -XXX,XX +XXX,XX @@ err_tests = [
113
'err_field4.decode',
114
'err_field5.decode',
115
'err_field6.decode',
116
+ 'err_field7.decode',
117
+ 'err_field8.decode',
118
+ 'err_field9.decode',
119
+ 'err_field10.decode',
120
'err_init1.decode',
121
'err_init2.decode',
122
'err_init3.decode',
123
@@ -XXX,XX +XXX,XX @@ succ_tests = [
124
'succ_argset_type1.decode',
125
'succ_function.decode',
126
'succ_ident1.decode',
127
+ 'succ_named_field.decode',
128
'succ_pattern_group_nest1.decode',
129
'succ_pattern_group_nest2.decode',
130
'succ_pattern_group_nest3.decode',
131
--
132
2.34.1
diff view generated by jsdifflib