1
The following changes since commit 3c8c36c9087da957f580a9bb5ebf7814a753d1c6:
1
The following changes since commit 579510e196a544b42bd8bca9cc61688d4d1211ac:
2
2
3
Merge remote-tracking branch 'remotes/kraxel/tags/ui-20201104-pull-request' into staging (2020-11-04 16:52:17 +0000)
3
Merge tag 'pull-monitor-2023-02-03-v2' of https://repo.or.cz/qemu/armbru into staging (2023-02-04 10:19:55 +0000)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://github.com/rth7680/qemu.git tags/pull-tcg-20201104
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230204
8
8
9
for you to fetch changes up to c56caea3b2a4ef5d760266f554df0d92c5a45f87:
9
for you to fetch changes up to a2495ede07498ee36b18b03e7038ba30c9871bb2:
10
10
11
tcg: Revert "tcg/optimize: Flush data at labels not TCG_OPF_BB_END" (2020-11-04 10:35:40 -0800)
11
tcg/aarch64: Fix patching of LDR in tb_target_set_jmp_target (2023-02-04 06:19:43 -1000)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Fix assert in set_jmp_reset_offset
14
tcg: Add support for TCGv_i128 in parameters and returns.
15
Revert cross-branch optimization in tcg/optimize.c.
15
tcg: Add support for TCGv_i128 in cmpxchg.
16
tcg: Test CPUJumpCache in tb_jmp_cache_clear_page
17
tcg: Split out tcg_gen_nonatomic_cmpxchg_i{32,64}
18
tcg/aarch64: Fix patching of LDR in tb_target_set_jmp_target
19
target/arm: Use tcg_gen_atomic_cmpxchg_i128
20
target/i386: Use tcg_gen_atomic_cmpxchg_i128
21
target/i386: Use tcg_gen_nonatomic_cmpxchg_i{32,64}
22
target/s390x: Use tcg_gen_atomic_cmpxchg_i128
23
target/s390x: Use TCGv_i128 in passing and returning float128
24
target/s390x: Implement CC_OP_NZ in gen_op_calc_cc
16
25
17
----------------------------------------------------------------
26
----------------------------------------------------------------
18
Richard Henderson (2):
27
Eric Auger (1):
19
tcg: Remove assert from set_jmp_reset_offset
28
accel/tcg: Test CPUJumpCache in tb_jmp_cache_clear_page
20
tcg: Revert "tcg/optimize: Flush data at labels not TCG_OPF_BB_END"
21
29
22
tcg/optimize.c | 35 +++++++++++++++++------------------
30
Ilya Leoshkevich (3):
23
tcg/tcg.c | 9 +++++----
31
tests/tcg/s390x: Add div.c
24
2 files changed, 22 insertions(+), 22 deletions(-)
32
tests/tcg/s390x: Add clst.c
33
tests/tcg/s390x: Add cdsg.c
25
34
35
Richard Henderson (36):
36
tcg: Init temp_subindex in liveness_pass_2
37
tcg: Define TCG_TYPE_I128 and related helper macros
38
tcg: Handle dh_typecode_i128 with TCG_CALL_{RET,ARG}_NORMAL
39
tcg: Allocate objects contiguously in temp_allocate_frame
40
tcg: Introduce tcg_out_addi_ptr
41
tcg: Add TCG_CALL_{RET,ARG}_BY_REF
42
tcg: Introduce tcg_target_call_oarg_reg
43
tcg: Add TCG_CALL_RET_BY_VEC
44
include/qemu/int128: Use Int128 structure for TCI
45
tcg/i386: Add TCG_TARGET_CALL_{RET,ARG}_I128
46
tcg/tci: Fix big-endian return register ordering
47
tcg/tci: Add TCG_TARGET_CALL_{RET,ARG}_I128
48
tcg: Add TCG_TARGET_CALL_{RET,ARG}_I128
49
tcg: Add temp allocation for TCGv_i128
50
tcg: Add basic data movement for TCGv_i128
51
tcg: Add guest load/store primitives for TCGv_i128
52
tcg: Add tcg_gen_{non}atomic_cmpxchg_i128
53
tcg: Split out tcg_gen_nonatomic_cmpxchg_i{32,64}
54
target/arm: Use tcg_gen_atomic_cmpxchg_i128 for STXP
55
target/arm: Use tcg_gen_atomic_cmpxchg_i128 for CASP
56
target/ppc: Use tcg_gen_atomic_cmpxchg_i128 for STQCX
57
tests/tcg/s390x: Add long-double.c
58
target/s390x: Use a single return for helper_divs32/u32
59
target/s390x: Use a single return for helper_divs64/u64
60
target/s390x: Use Int128 for return from CLST
61
target/s390x: Use Int128 for return from CKSM
62
target/s390x: Use Int128 for return from TRE
63
target/s390x: Copy wout_x1 to wout_x1_P
64
target/s390x: Use Int128 for returning float128
65
target/s390x: Use Int128 for passing float128
66
target/s390x: Use tcg_gen_atomic_cmpxchg_i128 for CDSG
67
target/s390x: Implement CC_OP_NZ in gen_op_calc_cc
68
target/i386: Split out gen_cmpxchg8b, gen_cmpxchg16b
69
target/i386: Inline cmpxchg8b
70
target/i386: Inline cmpxchg16b
71
tcg/aarch64: Fix patching of LDR in tb_target_set_jmp_target
72
73
accel/tcg/tcg-runtime.h | 11 ++
74
include/exec/cpu_ldst.h | 10 +
75
include/exec/helper-head.h | 7 +
76
include/qemu/atomic128.h | 29 ++-
77
include/qemu/int128.h | 25 ++-
78
include/tcg/tcg-op.h | 15 ++
79
include/tcg/tcg.h | 49 ++++-
80
target/arm/helper-a64.h | 8 -
81
target/i386/helper.h | 6 -
82
target/ppc/helper.h | 2 -
83
target/s390x/helper.h | 54 +++---
84
tcg/aarch64/tcg-target.h | 2 +
85
tcg/arm/tcg-target.h | 2 +
86
tcg/i386/tcg-target.h | 10 +
87
tcg/loongarch64/tcg-target.h | 2 +
88
tcg/mips/tcg-target.h | 2 +
89
tcg/riscv/tcg-target.h | 3 +
90
tcg/s390x/tcg-target.h | 2 +
91
tcg/sparc64/tcg-target.h | 2 +
92
tcg/tcg-internal.h | 17 ++
93
tcg/tci/tcg-target.h | 3 +
94
target/s390x/tcg/insn-data.h.inc | 60 +++---
95
accel/tcg/cputlb.c | 119 +++++++++++-
96
accel/tcg/user-exec.c | 66 +++++++
97
target/arm/helper-a64.c | 147 ---------------
98
target/arm/translate-a64.c | 121 ++++++------
99
target/i386/tcg/mem_helper.c | 126 -------------
100
target/i386/tcg/translate.c | 126 +++++++++++--
101
target/ppc/mem_helper.c | 44 -----
102
target/ppc/translate.c | 102 +++++-----
103
target/s390x/tcg/fpu_helper.c | 103 +++++-----
104
target/s390x/tcg/int_helper.c | 64 +++----
105
target/s390x/tcg/mem_helper.c | 77 +-------
106
target/s390x/tcg/translate.c | 212 ++++++++++++++-------
107
tcg/tcg-op.c | 393 +++++++++++++++++++++++++++++++++------
108
tcg/tcg.c | 308 ++++++++++++++++++++++++++----
109
tcg/tci.c | 65 +++----
110
tests/tcg/s390x/cdsg.c | 93 +++++++++
111
tests/tcg/s390x/clst.c | 82 ++++++++
112
tests/tcg/s390x/div.c | 75 ++++++++
113
tests/tcg/s390x/long-double.c | 24 +++
114
util/int128.c | 42 +++++
115
accel/tcg/atomic_common.c.inc | 45 +++++
116
tcg/aarch64/tcg-target.c.inc | 19 +-
117
tcg/arm/tcg-target.c.inc | 30 ++-
118
tcg/i386/tcg-target.c.inc | 52 +++++-
119
tcg/loongarch64/tcg-target.c.inc | 17 +-
120
tcg/mips/tcg-target.c.inc | 17 +-
121
tcg/ppc/tcg-target.c.inc | 20 +-
122
tcg/riscv/tcg-target.c.inc | 17 +-
123
tcg/s390x/tcg-target.c.inc | 16 +-
124
tcg/sparc64/tcg-target.c.inc | 19 +-
125
tcg/tci/tcg-target.c.inc | 27 ++-
126
tests/tcg/s390x/Makefile.target | 7 +
127
54 files changed, 2040 insertions(+), 956 deletions(-)
128
create mode 100644 tests/tcg/s390x/cdsg.c
129
create mode 100644 tests/tcg/s390x/clst.c
130
create mode 100644 tests/tcg/s390x/div.c
131
create mode 100644 tests/tcg/s390x/long-double.c
diff view generated by jsdifflib
New patch
1
From: Eric Auger <eric.auger@redhat.com>
1
2
3
After commit 4e4fa6c12d ("accel/tcg: Complete cpu initialization
4
before registration"), it looks the CPUJumpCache pointer can be NULL.
5
This causes a SIGSEV when running debug-wp-migration kvm unit test.
6
7
At the first place it should be clarified why this TCG code is called
8
with KVM acceleration. This may hide another bug.
9
10
Fixes: 4e4fa6c12d ("accel/tcg: Complete cpu initialization before registration")
11
Signed-off-by: Eric Auger <eric.auger@redhat.com>
12
Message-Id: <20230203171510.2867451-1-eric.auger@redhat.com>
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
---
15
accel/tcg/cputlb.c | 7 ++++++-
16
1 file changed, 6 insertions(+), 1 deletion(-)
17
18
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/accel/tcg/cputlb.c
21
+++ b/accel/tcg/cputlb.c
22
@@ -XXX,XX +XXX,XX @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
23
24
static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr)
25
{
26
- int i, i0 = tb_jmp_cache_hash_page(page_addr);
27
CPUJumpCache *jc = cpu->tb_jmp_cache;
28
+ int i, i0;
29
30
+ if (unlikely(!jc)) {
31
+ return;
32
+ }
33
+
34
+ i0 = tb_jmp_cache_hash_page(page_addr);
35
for (i = 0; i < TB_JMP_PAGE_SIZE; i++) {
36
qatomic_set(&jc->array[i0 + i].tb, NULL);
37
}
38
--
39
2.34.1
diff view generated by jsdifflib
New patch
1
Correctly handle large types while lowering.
1
2
3
Fixes: fac87bd2a49b ("tcg: Add temp_subindex to TCGTemp")
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/tcg.c | 1 +
7
1 file changed, 1 insertion(+)
8
9
diff --git a/tcg/tcg.c b/tcg/tcg.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/tcg.c
12
+++ b/tcg/tcg.c
13
@@ -XXX,XX +XXX,XX @@ static bool liveness_pass_2(TCGContext *s)
14
TCGTemp *dts = tcg_temp_alloc(s);
15
dts->type = its->type;
16
dts->base_type = its->base_type;
17
+ dts->temp_subindex = its->temp_subindex;
18
dts->kind = TEMP_EBB;
19
its->state_ptr = dts;
20
} else {
21
--
22
2.34.1
diff view generated by jsdifflib
New patch
1
Begin staging in support for TCGv_i128 with Int128.
2
Define the type enumerator, the typedef, and the
3
helper-head.h macros.
1
4
5
This cannot yet be used, because you can't allocate
6
temporaries of this new type.
7
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
12
include/exec/helper-head.h | 7 +++++++
13
include/tcg/tcg.h | 17 ++++++++++-------
14
2 files changed, 17 insertions(+), 7 deletions(-)
15
16
diff --git a/include/exec/helper-head.h b/include/exec/helper-head.h
17
index XXXXXXX..XXXXXXX 100644
18
--- a/include/exec/helper-head.h
19
+++ b/include/exec/helper-head.h
20
@@ -XXX,XX +XXX,XX @@
21
#define dh_alias_int i32
22
#define dh_alias_i64 i64
23
#define dh_alias_s64 i64
24
+#define dh_alias_i128 i128
25
#define dh_alias_f16 i32
26
#define dh_alias_f32 i32
27
#define dh_alias_f64 i64
28
@@ -XXX,XX +XXX,XX @@
29
#define dh_ctype_int int
30
#define dh_ctype_i64 uint64_t
31
#define dh_ctype_s64 int64_t
32
+#define dh_ctype_i128 Int128
33
#define dh_ctype_f16 uint32_t
34
#define dh_ctype_f32 float32
35
#define dh_ctype_f64 float64
36
@@ -XXX,XX +XXX,XX @@
37
#define dh_retvar_decl0_noreturn void
38
#define dh_retvar_decl0_i32 TCGv_i32 retval
39
#define dh_retvar_decl0_i64 TCGv_i64 retval
40
+#define dh_retval_decl0_i128 TCGv_i128 retval
41
#define dh_retvar_decl0_ptr TCGv_ptr retval
42
#define dh_retvar_decl0(t) glue(dh_retvar_decl0_, dh_alias(t))
43
44
@@ -XXX,XX +XXX,XX @@
45
#define dh_retvar_decl_noreturn
46
#define dh_retvar_decl_i32 TCGv_i32 retval,
47
#define dh_retvar_decl_i64 TCGv_i64 retval,
48
+#define dh_retvar_decl_i128 TCGv_i128 retval,
49
#define dh_retvar_decl_ptr TCGv_ptr retval,
50
#define dh_retvar_decl(t) glue(dh_retvar_decl_, dh_alias(t))
51
52
@@ -XXX,XX +XXX,XX @@
53
#define dh_retvar_noreturn NULL
54
#define dh_retvar_i32 tcgv_i32_temp(retval)
55
#define dh_retvar_i64 tcgv_i64_temp(retval)
56
+#define dh_retvar_i128 tcgv_i128_temp(retval)
57
#define dh_retvar_ptr tcgv_ptr_temp(retval)
58
#define dh_retvar(t) glue(dh_retvar_, dh_alias(t))
59
60
@@ -XXX,XX +XXX,XX @@
61
#define dh_typecode_i64 4
62
#define dh_typecode_s64 5
63
#define dh_typecode_ptr 6
64
+#define dh_typecode_i128 7
65
#define dh_typecode_int dh_typecode_s32
66
#define dh_typecode_f16 dh_typecode_i32
67
#define dh_typecode_f32 dh_typecode_i32
68
@@ -XXX,XX +XXX,XX @@
69
70
#define dh_callflag_i32 0
71
#define dh_callflag_i64 0
72
+#define dh_callflag_i128 0
73
#define dh_callflag_ptr 0
74
#define dh_callflag_void 0
75
#define dh_callflag_noreturn TCG_CALL_NO_RETURN
76
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
77
index XXXXXXX..XXXXXXX 100644
78
--- a/include/tcg/tcg.h
79
+++ b/include/tcg/tcg.h
80
@@ -XXX,XX +XXX,XX @@ typedef struct TCGPool {
81
typedef enum TCGType {
82
TCG_TYPE_I32,
83
TCG_TYPE_I64,
84
+ TCG_TYPE_I128,
85
86
TCG_TYPE_V64,
87
TCG_TYPE_V128,
88
@@ -XXX,XX +XXX,XX @@ typedef tcg_target_ulong TCGArg;
89
in tcg/README. Target CPU front-end code uses these types to deal
90
with TCG variables as it emits TCG code via the tcg_gen_* functions.
91
They come in several flavours:
92
- * TCGv_i32 : 32 bit integer type
93
- * TCGv_i64 : 64 bit integer type
94
- * TCGv_ptr : a host pointer type
95
- * TCGv_vec : a host vector type; the exact size is not exposed
96
- to the CPU front-end code.
97
- * TCGv : an integer type the same size as target_ulong
98
- (an alias for either TCGv_i32 or TCGv_i64)
99
+ * TCGv_i32 : 32 bit integer type
100
+ * TCGv_i64 : 64 bit integer type
101
+ * TCGv_i128 : 128 bit integer type
102
+ * TCGv_ptr : a host pointer type
103
+ * TCGv_vec : a host vector type; the exact size is not exposed
104
+ to the CPU front-end code.
105
+ * TCGv : an integer type the same size as target_ulong
106
+ (an alias for either TCGv_i32 or TCGv_i64)
107
The compiler's type checking will complain if you mix them
108
up and pass the wrong sized TCGv to a function.
109
110
@@ -XXX,XX +XXX,XX @@ typedef tcg_target_ulong TCGArg;
111
112
typedef struct TCGv_i32_d *TCGv_i32;
113
typedef struct TCGv_i64_d *TCGv_i64;
114
+typedef struct TCGv_i128_d *TCGv_i128;
115
typedef struct TCGv_ptr_d *TCGv_ptr;
116
typedef struct TCGv_vec_d *TCGv_vec;
117
typedef TCGv_ptr TCGv_env;
118
--
119
2.34.1
120
121
diff view generated by jsdifflib
1
This reverts commit cd0372c515c4732d8bd3777cdd995c139c7ed7ea.
1
Many hosts pass and return 128-bit quantities like sequential
2
64-bit quantities. Treat this just like we currently break
3
down 64-bit quantities for a 32-bit host.
2
4
3
The patch is incorrect in that it retains copies between globals and
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
non-local temps, and non-local temps still die at the end of the BB.
5
6
Failing test case for hppa:
7
8
    .globl    _start
9
_start:
10
    cmpiclr,=    0x24,%r19,%r0
11
    cmpiclr,<>    0x2f,%r19,%r19
12
13
---- 00010057 0001005b
14
movi_i32 tmp0,$0x24
15
sub_i32 tmp1,tmp0,r19
16
mov_i32 tmp2,tmp0
17
mov_i32 tmp3,r19
18
movi_i32 tmp1,$0x0
19
20
---- 0001005b 0001005f
21
brcond_i32 tmp2,tmp3,eq,$L1
22
movi_i32 tmp0,$0x2f
23
sub_i32 tmp1,tmp0,r19
24
mov_i32 tmp2,tmp0
25
mov_i32 tmp3,r19
26
movi_i32 tmp1,$0x0
27
mov_i32 r19,tmp1
28
setcond_i32 psw_n,tmp2,tmp3,ne
29
set_label $L1
30
31
In this case, both copies of "mov_i32 tmp3,r19" are removed. The
32
second because opt thought it was redundant. The first is removed
33
later by liveness because tmp3 is known to be dead. This leaves
34
the setcond_i32 with an uninitialized input.
35
36
Revert the entire patch for 5.2, and a proper optimization across
37
the branch may be considered for the next development cycle.
38
39
Reported-by: qemu@igor2.repo.hu
40
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
41
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
42
---
7
---
43
tcg/optimize.c | 35 +++++++++++++++++------------------
8
tcg/tcg.c | 37 +++++++++++++++++++++++++++++++++----
44
1 file changed, 17 insertions(+), 18 deletions(-)
9
1 file changed, 33 insertions(+), 4 deletions(-)
45
10
46
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/tcg.c b/tcg/tcg.c
47
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
48
--- a/tcg/optimize.c
13
--- a/tcg/tcg.c
49
+++ b/tcg/optimize.c
14
+++ b/tcg/tcg.c
50
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
15
@@ -XXX,XX +XXX,XX @@ static void init_call_layout(TCGHelperInfo *info)
51
}
16
case dh_typecode_s64:
52
}
17
info->nr_out = 64 / TCG_TARGET_REG_BITS;
53
}
18
info->out_kind = TCG_CALL_RET_NORMAL;
54
- /* fall through */
19
+ assert(info->nr_out <= ARRAY_SIZE(tcg_target_call_oarg_regs));
55
+ goto do_reset_output;
20
+ break;
56
21
+ case dh_typecode_i128:
22
+ info->nr_out = 128 / TCG_TARGET_REG_BITS;
23
+ info->out_kind = TCG_CALL_RET_NORMAL; /* TODO */
24
+ switch (/* TODO */ TCG_CALL_RET_NORMAL) {
25
+ case TCG_CALL_RET_NORMAL:
26
+ assert(info->nr_out <= ARRAY_SIZE(tcg_target_call_oarg_regs));
27
+ break;
28
+ default:
29
+ qemu_build_not_reached();
30
+ }
31
break;
32
default:
33
g_assert_not_reached();
34
}
35
- assert(info->nr_out <= ARRAY_SIZE(tcg_target_call_oarg_regs));
36
37
/*
38
* Parse and place function arguments.
39
@@ -XXX,XX +XXX,XX @@ static void init_call_layout(TCGHelperInfo *info)
40
case dh_typecode_ptr:
41
type = TCG_TYPE_PTR;
42
break;
43
+ case dh_typecode_i128:
44
+ type = TCG_TYPE_I128;
45
+ break;
57
default:
46
default:
58
do_default:
47
g_assert_not_reached();
59
- /*
48
}
60
- * Default case: we know nothing about operation (or were unable
49
@@ -XXX,XX +XXX,XX @@ static void init_call_layout(TCGHelperInfo *info)
61
- * to compute the operation result) so no propagation is done.
62
- */
63
- for (i = 0; i < nb_oargs; i++) {
64
- reset_temp(op->args[i]);
65
- /*
66
- * Save the corresponding known-zero bits mask for the
67
- * first output argument (only one supported so far).
68
- */
69
- if (i == 0) {
70
- arg_info(op->args[i])->mask = mask;
71
+ /* Default case: we know nothing about operation (or were unable
72
+ to compute the operation result) so no propagation is done.
73
+ We trash everything if the operation is the end of a basic
74
+ block, otherwise we only trash the output args. "mask" is
75
+ the non-zero bits mask for the first output arg. */
76
+ if (def->flags & TCG_OPF_BB_END) {
77
+ bitmap_zero(temps_used.l, nb_temps);
78
+ } else {
79
+ do_reset_output:
80
+ for (i = 0; i < nb_oargs; i++) {
81
+ reset_temp(op->args[i]);
82
+ /* Save the corresponding known-zero bits mask for the
83
+ first output argument (only one supported so far). */
84
+ if (i == 0) {
85
+ arg_info(op->args[i])->mask = mask;
86
+ }
87
}
88
}
50
}
89
break;
51
break;
90
-
52
91
- case INDEX_op_set_label:
53
+ case TCG_TYPE_I128:
92
- /* Trash everything at the start of a new extended bb. */
54
+ switch (/* TODO */ TCG_CALL_ARG_NORMAL) {
93
- bitmap_zero(temps_used.l, nb_temps);
55
+ case TCG_CALL_ARG_EVEN:
94
- break;
56
+ layout_arg_even(&cum);
57
+ /* fall through */
58
+ case TCG_CALL_ARG_NORMAL:
59
+ layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
60
+ break;
61
+ default:
62
+ qemu_build_not_reached();
63
+ }
64
+ break;
65
+
66
default:
67
g_assert_not_reached();
95
}
68
}
96
69
@@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
97
/* Eliminate duplicate and redundant fence instructions. */
70
op->args[pi++] = temp_arg(ret);
71
break;
72
case 2:
73
+ case 4:
74
tcg_debug_assert(ret != NULL);
75
- tcg_debug_assert(ret->base_type == ret->type + 1);
76
+ tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
77
tcg_debug_assert(ret->temp_subindex == 0);
78
- op->args[pi++] = temp_arg(ret);
79
- op->args[pi++] = temp_arg(ret + 1);
80
+ for (i = 0; i < n; ++i) {
81
+ op->args[pi++] = temp_arg(ret + i);
82
+ }
83
break;
84
default:
85
g_assert_not_reached();
98
--
86
--
99
2.25.1
87
2.34.1
100
88
101
89
diff view generated by jsdifflib
1
Since 6e6c4efed99, there has been a more appropriate range check
1
When allocating a temp to the stack frame, consider the
2
done later at the end of tcg_gen_code. There, a failing range
2
base type and allocate all parts at once.
3
check results in a returned error code, which causes the TB to
4
be restarted at half the size.
5
3
6
Reported-by: Sai Pavan Boddu <saipava@xilinx.com>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Tested-by: Sai Pavan Boddu <sai.pavan.boddu@xilinx.com>
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
6
---
11
tcg/tcg.c | 9 +++++----
7
tcg/tcg.c | 34 ++++++++++++++++++++++++++--------
12
1 file changed, 5 insertions(+), 4 deletions(-)
8
1 file changed, 26 insertions(+), 8 deletions(-)
13
9
14
diff --git a/tcg/tcg.c b/tcg/tcg.c
10
diff --git a/tcg/tcg.c b/tcg/tcg.c
15
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/tcg.c
12
--- a/tcg/tcg.c
17
+++ b/tcg/tcg.c
13
+++ b/tcg/tcg.c
18
@@ -XXX,XX +XXX,XX @@ static bool tcg_resolve_relocs(TCGContext *s)
14
@@ -XXX,XX +XXX,XX @@ static bool liveness_pass_2(TCGContext *s)
19
15
20
static void set_jmp_reset_offset(TCGContext *s, int which)
16
static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
21
{
17
{
22
- size_t off = tcg_current_code_size(s);
18
- int size = tcg_type_size(ts->type);
23
- s->tb_jmp_reset_offset[which] = off;
19
- int align;
24
- /* Make sure that we didn't overflow the stored offset. */
20
intptr_t off;
25
- assert(s->tb_jmp_reset_offset[which] == off);
21
+ int size, align;
26
+ /*
22
27
+ * We will check for overflow at the end of the opcode loop in
23
- switch (ts->type) {
28
+ * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
24
+ /* When allocating an object, look at the full type. */
29
+ */
25
+ size = tcg_type_size(ts->base_type);
30
+ s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
26
+ switch (ts->base_type) {
27
case TCG_TYPE_I32:
28
align = 4;
29
break;
30
@@ -XXX,XX +XXX,XX @@ static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
31
tcg_raise_tb_overflow(s);
32
}
33
s->current_frame_offset = off + size;
34
-
35
- ts->mem_offset = off;
36
#if defined(__sparc__)
37
- ts->mem_offset += TCG_TARGET_STACK_BIAS;
38
+ off += TCG_TARGET_STACK_BIAS;
39
#endif
40
- ts->mem_base = s->frame_temp;
41
- ts->mem_allocated = 1;
42
+
43
+ /* If the object was subdivided, assign memory to all the parts. */
44
+ if (ts->base_type != ts->type) {
45
+ int part_size = tcg_type_size(ts->type);
46
+ int part_count = size / part_size;
47
+
48
+ /*
49
+ * Each part is allocated sequentially in tcg_temp_new_internal.
50
+ * Jump back to the first part by subtracting the current index.
51
+ */
52
+ ts -= ts->temp_subindex;
53
+ for (int i = 0; i < part_count; ++i) {
54
+ ts[i].mem_offset = off + i * part_size;
55
+ ts[i].mem_base = s->frame_temp;
56
+ ts[i].mem_allocated = 1;
57
+ }
58
+ } else {
59
+ ts->mem_offset = off;
60
+ ts->mem_base = s->frame_temp;
61
+ ts->mem_allocated = 1;
62
+ }
31
}
63
}
32
64
33
#include "tcg-target.c.inc"
65
/* Assign @reg to @ts, and update reg_to_temp[]. */
34
--
66
--
35
2.25.1
67
2.34.1
36
68
37
69
diff view generated by jsdifflib
New patch
1
1
Implement the function for arm, i386, and s390x, which will use it.
2
Add stubs for all other backends.
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/tcg.c | 2 ++
9
tcg/aarch64/tcg-target.c.inc | 7 +++++++
10
tcg/arm/tcg-target.c.inc | 20 ++++++++++++++++++++
11
tcg/i386/tcg-target.c.inc | 8 ++++++++
12
tcg/loongarch64/tcg-target.c.inc | 7 +++++++
13
tcg/mips/tcg-target.c.inc | 7 +++++++
14
tcg/ppc/tcg-target.c.inc | 7 +++++++
15
tcg/riscv/tcg-target.c.inc | 7 +++++++
16
tcg/s390x/tcg-target.c.inc | 7 +++++++
17
tcg/sparc64/tcg-target.c.inc | 7 +++++++
18
tcg/tci/tcg-target.c.inc | 7 +++++++
19
11 files changed, 86 insertions(+)
20
21
diff --git a/tcg/tcg.c b/tcg/tcg.c
22
index XXXXXXX..XXXXXXX 100644
23
--- a/tcg/tcg.c
24
+++ b/tcg/tcg.c
25
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
26
static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
27
static void tcg_out_movi(TCGContext *s, TCGType type,
28
TCGReg ret, tcg_target_long arg);
29
+static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long)
30
+ __attribute__((unused));
31
static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
32
static void tcg_out_goto_tb(TCGContext *s, int which);
33
static void tcg_out_op(TCGContext *s, TCGOpcode opc,
34
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
35
index XXXXXXX..XXXXXXX 100644
36
--- a/tcg/aarch64/tcg-target.c.inc
37
+++ b/tcg/aarch64/tcg-target.c.inc
38
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
39
tcg_out_insn(s, 3305, LDR, 0, rd);
40
}
41
42
+static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
43
+ tcg_target_long imm)
44
+{
45
+ /* This function is only used for passing structs by reference. */
46
+ g_assert_not_reached();
47
+}
48
+
49
/* Define something more legible for general use. */
50
#define tcg_out_ldst_r tcg_out_insn_3310
51
52
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
53
index XXXXXXX..XXXXXXX 100644
54
--- a/tcg/arm/tcg-target.c.inc
55
+++ b/tcg/arm/tcg-target.c.inc
56
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type,
57
tcg_out_movi32(s, COND_AL, ret, arg);
58
}
59
60
+static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
61
+ tcg_target_long imm)
62
+{
63
+ int enc, opc = ARITH_ADD;
64
+
65
+ /* All of the easiest immediates to encode are positive. */
66
+ if (imm < 0) {
67
+ imm = -imm;
68
+ opc = ARITH_SUB;
69
+ }
70
+ enc = encode_imm(imm);
71
+ if (enc >= 0) {
72
+ tcg_out_dat_imm(s, COND_AL, opc, rd, rs, enc);
73
+ } else {
74
+ tcg_out_movi32(s, COND_AL, TCG_REG_TMP, imm);
75
+ tcg_out_dat_reg(s, COND_AL, opc, rd, rs,
76
+ TCG_REG_TMP, SHIFT_IMM_LSL(0));
77
+ }
78
+}
79
+
80
/* Type is always V128, with I64 elements. */
81
static void tcg_out_dup2_vec(TCGContext *s, TCGReg rd, TCGReg rl, TCGReg rh)
82
{
83
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
84
index XXXXXXX..XXXXXXX 100644
85
--- a/tcg/i386/tcg-target.c.inc
86
+++ b/tcg/i386/tcg-target.c.inc
87
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type,
88
}
89
}
90
91
+static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
92
+ tcg_target_long imm)
93
+{
94
+ /* This function is only used for passing structs by reference. */
95
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
96
+ tcg_out_modrm_offset(s, OPC_LEA, rd, rs, imm);
97
+}
98
+
99
static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
100
{
101
if (val == (int8_t)val) {
102
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
103
index XXXXXXX..XXXXXXX 100644
104
--- a/tcg/loongarch64/tcg-target.c.inc
105
+++ b/tcg/loongarch64/tcg-target.c.inc
106
@@ -XXX,XX +XXX,XX @@ static void tcg_out_addi(TCGContext *s, TCGType type, TCGReg rd,
107
}
108
}
109
110
+static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
111
+ tcg_target_long imm)
112
+{
113
+ /* This function is only used for passing structs by reference. */
114
+ g_assert_not_reached();
115
+}
116
+
117
static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg)
118
{
119
tcg_out_opc_andi(s, ret, arg, 0xff);
120
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
121
index XXXXXXX..XXXXXXX 100644
122
--- a/tcg/mips/tcg-target.c.inc
123
+++ b/tcg/mips/tcg-target.c.inc
124
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type,
125
}
126
}
127
128
+static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
129
+ tcg_target_long imm)
130
+{
131
+ /* This function is only used for passing structs by reference. */
132
+ g_assert_not_reached();
133
+}
134
+
135
static void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg, int flags)
136
{
137
/* ret and arg can't be register tmp0 */
138
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
139
index XXXXXXX..XXXXXXX 100644
140
--- a/tcg/ppc/tcg-target.c.inc
141
+++ b/tcg/ppc/tcg-target.c.inc
142
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
143
}
144
}
145
146
+static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
147
+ tcg_target_long imm)
148
+{
149
+ /* This function is only used for passing structs by reference. */
150
+ g_assert_not_reached();
151
+}
152
+
153
static bool mask_operand(uint32_t c, int *mb, int *me)
154
{
155
uint32_t lsb, test;
156
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
157
index XXXXXXX..XXXXXXX 100644
158
--- a/tcg/riscv/tcg-target.c.inc
159
+++ b/tcg/riscv/tcg-target.c.inc
160
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
161
tcg_out_opc_imm(s, OPC_LD, rd, rd, 0);
162
}
163
164
+static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
165
+ tcg_target_long imm)
166
+{
167
+ /* This function is only used for passing structs by reference. */
168
+ g_assert_not_reached();
169
+}
170
+
171
static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg)
172
{
173
tcg_out_opc_imm(s, OPC_ANDI, ret, arg, 0xff);
174
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
175
index XXXXXXX..XXXXXXX 100644
176
--- a/tcg/s390x/tcg-target.c.inc
177
+++ b/tcg/s390x/tcg-target.c.inc
178
@@ -XXX,XX +XXX,XX @@ static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
179
return false;
180
}
181
182
+static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
183
+ tcg_target_long imm)
184
+{
185
+ /* This function is only used for passing structs by reference. */
186
+ tcg_out_mem(s, RX_LA, RXY_LAY, rd, rs, TCG_REG_NONE, imm);
187
+}
188
+
189
static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src,
190
int msb, int lsb, int ofs, int z)
191
{
192
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
193
index XXXXXXX..XXXXXXX 100644
194
--- a/tcg/sparc64/tcg-target.c.inc
195
+++ b/tcg/sparc64/tcg-target.c.inc
196
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type,
197
tcg_out_movi_int(s, type, ret, arg, false, TCG_REG_T2);
198
}
199
200
+static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
201
+ tcg_target_long imm)
202
+{
203
+ /* This function is only used for passing structs by reference. */
204
+ g_assert_not_reached();
205
+}
206
+
207
static void tcg_out_ldst_rr(TCGContext *s, TCGReg data, TCGReg a1,
208
TCGReg a2, int op)
209
{
210
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
211
index XXXXXXX..XXXXXXX 100644
212
--- a/tcg/tci/tcg-target.c.inc
213
+++ b/tcg/tci/tcg-target.c.inc
214
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type,
215
}
216
}
217
218
+static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
219
+ tcg_target_long imm)
220
+{
221
+ /* This function is only used for passing structs by reference. */
222
+ g_assert_not_reached();
223
+}
224
+
225
static void tcg_out_call(TCGContext *s, const tcg_insn_unit *func,
226
const TCGHelperInfo *info)
227
{
228
--
229
2.34.1
230
231
diff view generated by jsdifflib
New patch
1
1
These will be used by some hosts, both 32 and 64-bit, to pass and
2
return i128. Not yet used, because allocation is not yet enabled.
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/tcg-internal.h | 3 +
8
tcg/tcg.c | 135 ++++++++++++++++++++++++++++++++++++++++++++-
9
2 files changed, 135 insertions(+), 3 deletions(-)
10
11
diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/tcg-internal.h
14
+++ b/tcg/tcg-internal.h
15
@@ -XXX,XX +XXX,XX @@
16
*/
17
typedef enum {
18
TCG_CALL_RET_NORMAL, /* by registers */
19
+ TCG_CALL_RET_BY_REF, /* for i128, by reference */
20
} TCGCallReturnKind;
21
22
typedef enum {
23
@@ -XXX,XX +XXX,XX @@ typedef enum {
24
TCG_CALL_ARG_EXTEND, /* for i32, as a sign/zero-extended i64 */
25
TCG_CALL_ARG_EXTEND_U, /* ... as a zero-extended i64 */
26
TCG_CALL_ARG_EXTEND_S, /* ... as a sign-extended i64 */
27
+ TCG_CALL_ARG_BY_REF, /* for i128, by reference, first */
28
+ TCG_CALL_ARG_BY_REF_N, /* ... by reference, subsequent */
29
} TCGCallArgumentKind;
30
31
typedef struct TCGCallArgumentLoc {
32
diff --git a/tcg/tcg.c b/tcg/tcg.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/tcg/tcg.c
35
+++ b/tcg/tcg.c
36
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
37
static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
38
static void tcg_out_movi(TCGContext *s, TCGType type,
39
TCGReg ret, tcg_target_long arg);
40
-static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long)
41
- __attribute__((unused));
42
+static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
43
static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
44
static void tcg_out_goto_tb(TCGContext *s, int which);
45
static void tcg_out_op(TCGContext *s, TCGOpcode opc,
46
@@ -XXX,XX +XXX,XX @@ static void layout_arg_normal_n(TCGCumulativeArgs *cum,
47
cum->arg_slot += n;
48
}
49
50
+static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
51
+{
52
+ TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
53
+ int n = 128 / TCG_TARGET_REG_BITS;
54
+
55
+ /* The first subindex carries the pointer. */
56
+ layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
57
+
58
+ /*
59
+ * The callee is allowed to clobber memory associated with
60
+ * structure pass by-reference. Therefore we must make copies.
61
+ * Allocate space from "ref_slot", which will be adjusted to
62
+ * follow the parameters on the stack.
63
+ */
64
+ loc[0].ref_slot = cum->ref_slot;
65
+
66
+ /*
67
+ * Subsequent words also go into the reference slot, but
68
+ * do not accumulate into the regular arguments.
69
+ */
70
+ for (int i = 1; i < n; ++i) {
71
+ loc[i] = (TCGCallArgumentLoc){
72
+ .kind = TCG_CALL_ARG_BY_REF_N,
73
+ .arg_idx = cum->arg_idx,
74
+ .tmp_subindex = i,
75
+ .ref_slot = cum->ref_slot + i,
76
+ };
77
+ }
78
+ cum->info_in_idx += n;
79
+ cum->ref_slot += n;
80
+}
81
+
82
static void init_call_layout(TCGHelperInfo *info)
83
{
84
int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
85
@@ -XXX,XX +XXX,XX @@ static void init_call_layout(TCGHelperInfo *info)
86
case TCG_CALL_RET_NORMAL:
87
assert(info->nr_out <= ARRAY_SIZE(tcg_target_call_oarg_regs));
88
break;
89
+ case TCG_CALL_RET_BY_REF:
90
+ /*
91
+ * Allocate the first argument to the output.
92
+ * We don't need to store this anywhere, just make it
93
+ * unavailable for use in the input loop below.
94
+ */
95
+ cum.arg_slot = 1;
96
+ break;
97
default:
98
qemu_build_not_reached();
99
}
100
@@ -XXX,XX +XXX,XX @@ static void init_call_layout(TCGHelperInfo *info)
101
case TCG_CALL_ARG_NORMAL:
102
layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
103
break;
104
+ case TCG_CALL_ARG_BY_REF:
105
+ layout_arg_by_ref(&cum, info);
106
+ break;
107
default:
108
qemu_build_not_reached();
109
}
110
@@ -XXX,XX +XXX,XX @@ static void init_call_layout(TCGHelperInfo *info)
111
assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
112
/* Validate the backend has enough argument space. */
113
assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
114
- assert(cum.ref_slot <= max_stk_slots);
115
+
116
+ /*
117
+ * Relocate the "ref_slot" area to the end of the parameters.
118
+ * Minimizing this stack offset helps code size for x86,
119
+ * which has a signed 8-bit offset encoding.
120
+ */
121
+ if (cum.ref_slot != 0) {
122
+ int ref_base = 0;
123
+
124
+ if (cum.arg_slot > max_reg_slots) {
125
+ int align = __alignof(Int128) / sizeof(tcg_target_long);
126
+
127
+ ref_base = cum.arg_slot - max_reg_slots;
128
+ if (align > 1) {
129
+ ref_base = ROUND_UP(ref_base, align);
130
+ }
131
+ }
132
+ assert(ref_base + cum.ref_slot <= max_stk_slots);
133
+
134
+ if (ref_base != 0) {
135
+ for (int i = cum.info_in_idx - 1; i >= 0; --i) {
136
+ TCGCallArgumentLoc *loc = &info->in[i];
137
+ switch (loc->kind) {
138
+ case TCG_CALL_ARG_BY_REF:
139
+ case TCG_CALL_ARG_BY_REF_N:
140
+ loc->ref_slot += ref_base;
141
+ break;
142
+ default:
143
+ break;
144
+ }
145
+ }
146
+ }
147
+ }
148
}
149
150
static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
151
@@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
152
153
switch (loc->kind) {
154
case TCG_CALL_ARG_NORMAL:
155
+ case TCG_CALL_ARG_BY_REF:
156
+ case TCG_CALL_ARG_BY_REF_N:
157
op->args[pi++] = temp_arg(ts);
158
break;
159
160
@@ -XXX,XX +XXX,XX @@ static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
161
}
162
}
163
164
+static void load_arg_ref(TCGContext *s, int arg_slot, TCGReg ref_base,
165
+ intptr_t ref_off, TCGRegSet *allocated_regs)
166
+{
167
+ TCGReg reg;
168
+ int stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
169
+
170
+ if (stk_slot < 0) {
171
+ reg = tcg_target_call_iarg_regs[arg_slot];
172
+ tcg_reg_free(s, reg, *allocated_regs);
173
+ tcg_out_addi_ptr(s, reg, ref_base, ref_off);
174
+ tcg_regset_set_reg(*allocated_regs, reg);
175
+ } else {
176
+ reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
177
+ *allocated_regs, 0, false);
178
+ tcg_out_addi_ptr(s, reg, ref_base, ref_off);
179
+ tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
180
+ TCG_TARGET_CALL_STACK_OFFSET
181
+ + stk_slot * sizeof(tcg_target_long));
182
+ }
183
+}
184
+
185
static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
186
{
187
const int nb_oargs = TCGOP_CALLO(op);
188
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
189
case TCG_CALL_ARG_EXTEND_S:
190
load_arg_normal(s, loc, ts, &allocated_regs);
191
break;
192
+ case TCG_CALL_ARG_BY_REF:
193
+ load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
194
+ load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
195
+ TCG_TARGET_CALL_STACK_OFFSET
196
+ + loc->ref_slot * sizeof(tcg_target_long),
197
+ &allocated_regs);
198
+ break;
199
+ case TCG_CALL_ARG_BY_REF_N:
200
+ load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
201
+ break;
202
default:
203
g_assert_not_reached();
204
}
205
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
206
save_globals(s, allocated_regs);
207
}
208
209
+ /*
210
+ * If the ABI passes a pointer to the returned struct as the first
211
+ * argument, load that now. Pass a pointer to the output home slot.
212
+ */
213
+ if (info->out_kind == TCG_CALL_RET_BY_REF) {
214
+ TCGTemp *ts = arg_temp(op->args[0]);
215
+
216
+ if (!ts->mem_allocated) {
217
+ temp_allocate_frame(s, ts);
218
+ }
219
+ load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
220
+ }
221
+
222
tcg_out_call(s, tcg_call_func(op), info);
223
224
/* Assign output registers and emit moves if needed. */
225
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
226
ts->mem_coherent = 0;
227
}
228
break;
229
+
230
+ case TCG_CALL_RET_BY_REF:
231
+ /* The callee has performed a write through the reference. */
232
+ for (i = 0; i < nb_oargs; i++) {
233
+ TCGTemp *ts = arg_temp(op->args[i]);
234
+ ts->val_type = TEMP_VAL_MEM;
235
+ }
236
+ break;
237
+
238
default:
239
g_assert_not_reached();
240
}
241
--
242
2.34.1
243
244
diff view generated by jsdifflib
New patch
1
1
Replace the flat array tcg_target_call_oarg_regs[] with
2
a function call including the TCGCallReturnKind.
3
4
Extend the set of registers for ARM to r0-r3 to match the ABI:
5
https://github.com/ARM-software/abi-aa/blob/main/aapcs32/aapcs32.rst#result-return
6
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
8
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
tcg/tcg.c | 9 ++++++---
12
tcg/aarch64/tcg-target.c.inc | 10 +++++++---
13
tcg/arm/tcg-target.c.inc | 10 +++++++---
14
tcg/i386/tcg-target.c.inc | 16 ++++++++++------
15
tcg/loongarch64/tcg-target.c.inc | 10 ++++++----
16
tcg/mips/tcg-target.c.inc | 10 ++++++----
17
tcg/ppc/tcg-target.c.inc | 10 ++++++----
18
tcg/riscv/tcg-target.c.inc | 10 ++++++----
19
tcg/s390x/tcg-target.c.inc | 9 ++++++---
20
tcg/sparc64/tcg-target.c.inc | 12 ++++++------
21
tcg/tci/tcg-target.c.inc | 12 ++++++------
22
11 files changed, 72 insertions(+), 46 deletions(-)
23
24
diff --git a/tcg/tcg.c b/tcg/tcg.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/tcg/tcg.c
27
+++ b/tcg/tcg.c
28
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
29
TCGReg base, intptr_t ofs);
30
static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
31
const TCGHelperInfo *info);
32
+static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
33
static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
34
#ifdef TCG_TARGET_NEED_LDST_LABELS
35
static int tcg_out_ldst_finalize(TCGContext *s);
36
@@ -XXX,XX +XXX,XX @@ static void init_call_layout(TCGHelperInfo *info)
37
case dh_typecode_s64:
38
info->nr_out = 64 / TCG_TARGET_REG_BITS;
39
info->out_kind = TCG_CALL_RET_NORMAL;
40
- assert(info->nr_out <= ARRAY_SIZE(tcg_target_call_oarg_regs));
41
+ /* Query the last register now to trigger any assert early. */
42
+ tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
43
break;
44
case dh_typecode_i128:
45
info->nr_out = 128 / TCG_TARGET_REG_BITS;
46
info->out_kind = TCG_CALL_RET_NORMAL; /* TODO */
47
switch (/* TODO */ TCG_CALL_RET_NORMAL) {
48
case TCG_CALL_RET_NORMAL:
49
- assert(info->nr_out <= ARRAY_SIZE(tcg_target_call_oarg_regs));
50
+ /* Query the last register now to trigger any assert early. */
51
+ tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
52
break;
53
case TCG_CALL_RET_BY_REF:
54
/*
55
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
56
case TCG_CALL_RET_NORMAL:
57
for (i = 0; i < nb_oargs; i++) {
58
TCGTemp *ts = arg_temp(op->args[i]);
59
- TCGReg reg = tcg_target_call_oarg_regs[i];
60
+ TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
61
62
/* ENV should not be modified. */
63
tcg_debug_assert(!temp_readonly(ts));
64
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
65
index XXXXXXX..XXXXXXX 100644
66
--- a/tcg/aarch64/tcg-target.c.inc
67
+++ b/tcg/aarch64/tcg-target.c.inc
68
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_call_iarg_regs[8] = {
69
TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
70
TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
71
};
72
-static const int tcg_target_call_oarg_regs[1] = {
73
- TCG_REG_X0
74
-};
75
+
76
+static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
77
+{
78
+ tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
79
+ tcg_debug_assert(slot >= 0 && slot <= 1);
80
+ return TCG_REG_X0 + slot;
81
+}
82
83
#define TCG_REG_TMP TCG_REG_X30
84
#define TCG_VEC_TMP TCG_REG_V31
85
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
86
index XXXXXXX..XXXXXXX 100644
87
--- a/tcg/arm/tcg-target.c.inc
88
+++ b/tcg/arm/tcg-target.c.inc
89
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = {
90
static const int tcg_target_call_iarg_regs[4] = {
91
TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3
92
};
93
-static const int tcg_target_call_oarg_regs[2] = {
94
- TCG_REG_R0, TCG_REG_R1
95
-};
96
+
97
+static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
98
+{
99
+ tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
100
+ tcg_debug_assert(slot >= 0 && slot <= 3);
101
+ return TCG_REG_R0 + slot;
102
+}
103
104
#define TCG_REG_TMP TCG_REG_R12
105
#define TCG_VEC_TMP TCG_REG_Q15
106
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
107
index XXXXXXX..XXXXXXX 100644
108
--- a/tcg/i386/tcg-target.c.inc
109
+++ b/tcg/i386/tcg-target.c.inc
110
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_call_iarg_regs[] = {
111
#endif
112
};
113
114
-static const int tcg_target_call_oarg_regs[] = {
115
- TCG_REG_EAX,
116
-#if TCG_TARGET_REG_BITS == 32
117
- TCG_REG_EDX
118
-#endif
119
-};
120
+static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
121
+{
122
+ switch (kind) {
123
+ case TCG_CALL_RET_NORMAL:
124
+ tcg_debug_assert(slot >= 0 && slot <= 1);
125
+ return slot ? TCG_REG_EDX : TCG_REG_EAX;
126
+ default:
127
+ g_assert_not_reached();
128
+ }
129
+}
130
131
/* Constants we accept. */
132
#define TCG_CT_CONST_S32 0x100
133
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
134
index XXXXXXX..XXXXXXX 100644
135
--- a/tcg/loongarch64/tcg-target.c.inc
136
+++ b/tcg/loongarch64/tcg-target.c.inc
137
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_call_iarg_regs[] = {
138
TCG_REG_A7,
139
};
140
141
-static const int tcg_target_call_oarg_regs[] = {
142
- TCG_REG_A0,
143
- TCG_REG_A1,
144
-};
145
+static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
146
+{
147
+ tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
148
+ tcg_debug_assert(slot >= 0 && slot <= 1);
149
+ return TCG_REG_A0 + slot;
150
+}
151
152
#ifndef CONFIG_SOFTMMU
153
#define USE_GUEST_BASE (guest_base != 0)
154
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
155
index XXXXXXX..XXXXXXX 100644
156
--- a/tcg/mips/tcg-target.c.inc
157
+++ b/tcg/mips/tcg-target.c.inc
158
@@ -XXX,XX +XXX,XX @@ static const TCGReg tcg_target_call_iarg_regs[] = {
159
#endif
160
};
161
162
-static const TCGReg tcg_target_call_oarg_regs[2] = {
163
- TCG_REG_V0,
164
- TCG_REG_V1
165
-};
166
+static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
167
+{
168
+ tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
169
+ tcg_debug_assert(slot >= 0 && slot <= 1);
170
+ return TCG_REG_V0 + slot;
171
+}
172
173
static const tcg_insn_unit *tb_ret_addr;
174
static const tcg_insn_unit *bswap32_addr;
175
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
176
index XXXXXXX..XXXXXXX 100644
177
--- a/tcg/ppc/tcg-target.c.inc
178
+++ b/tcg/ppc/tcg-target.c.inc
179
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_call_iarg_regs[] = {
180
TCG_REG_R10
181
};
182
183
-static const int tcg_target_call_oarg_regs[] = {
184
- TCG_REG_R3,
185
- TCG_REG_R4
186
-};
187
+static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
188
+{
189
+ tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
190
+ tcg_debug_assert(slot >= 0 && slot <= 1);
191
+ return TCG_REG_R3 + slot;
192
+}
193
194
static const int tcg_target_callee_save_regs[] = {
195
#ifdef _CALL_DARWIN
196
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
197
index XXXXXXX..XXXXXXX 100644
198
--- a/tcg/riscv/tcg-target.c.inc
199
+++ b/tcg/riscv/tcg-target.c.inc
200
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_call_iarg_regs[] = {
201
TCG_REG_A7,
202
};
203
204
-static const int tcg_target_call_oarg_regs[] = {
205
- TCG_REG_A0,
206
- TCG_REG_A1,
207
-};
208
+static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
209
+{
210
+ tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
211
+ tcg_debug_assert(slot >= 0 && slot <= 1);
212
+ return TCG_REG_A0 + slot;
213
+}
214
215
#define TCG_CT_CONST_ZERO 0x100
216
#define TCG_CT_CONST_S12 0x200
217
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
218
index XXXXXXX..XXXXXXX 100644
219
--- a/tcg/s390x/tcg-target.c.inc
220
+++ b/tcg/s390x/tcg-target.c.inc
221
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_call_iarg_regs[] = {
222
TCG_REG_R6,
223
};
224
225
-static const int tcg_target_call_oarg_regs[] = {
226
- TCG_REG_R2,
227
-};
228
+static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
229
+{
230
+ tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
231
+ tcg_debug_assert(slot == 0);
232
+ return TCG_REG_R2;
233
+}
234
235
#define S390_CC_EQ 8
236
#define S390_CC_LT 4
237
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
238
index XXXXXXX..XXXXXXX 100644
239
--- a/tcg/sparc64/tcg-target.c.inc
240
+++ b/tcg/sparc64/tcg-target.c.inc
241
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_call_iarg_regs[6] = {
242
TCG_REG_O5,
243
};
244
245
-static const int tcg_target_call_oarg_regs[] = {
246
- TCG_REG_O0,
247
- TCG_REG_O1,
248
- TCG_REG_O2,
249
- TCG_REG_O3,
250
-};
251
+static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
252
+{
253
+ tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
254
+ tcg_debug_assert(slot >= 0 && slot <= 3);
255
+ return TCG_REG_O0 + slot;
256
+}
257
258
#define INSN_OP(x) ((x) << 30)
259
#define INSN_OP2(x) ((x) << 22)
260
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
261
index XXXXXXX..XXXXXXX 100644
262
--- a/tcg/tci/tcg-target.c.inc
263
+++ b/tcg/tci/tcg-target.c.inc
264
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = {
265
/* No call arguments via registers. All will be stored on the "stack". */
266
static const int tcg_target_call_iarg_regs[] = { };
267
268
-static const int tcg_target_call_oarg_regs[] = {
269
- TCG_REG_R0,
270
-#if TCG_TARGET_REG_BITS == 32
271
- TCG_REG_R1
272
-#endif
273
-};
274
+static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
275
+{
276
+ tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
277
+ tcg_debug_assert(slot >= 0 && slot < 64 / TCG_TARGET_REG_BITS);
278
+ return TCG_REG_R0 + slot;
279
+}
280
281
#ifdef CONFIG_DEBUG_TCG
282
static const char *const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
283
--
284
2.34.1
285
286
diff view generated by jsdifflib
New patch
1
This will be used by _WIN64 to return i128. Not yet used,
2
because allocation is not yet enabled.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/tcg-internal.h | 1 +
8
tcg/tcg.c | 19 +++++++++++++++++++
9
2 files changed, 20 insertions(+)
10
11
diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/tcg-internal.h
14
+++ b/tcg/tcg-internal.h
15
@@ -XXX,XX +XXX,XX @@
16
typedef enum {
17
TCG_CALL_RET_NORMAL, /* by registers */
18
TCG_CALL_RET_BY_REF, /* for i128, by reference */
19
+ TCG_CALL_RET_BY_VEC, /* for i128, by vector register */
20
} TCGCallReturnKind;
21
22
typedef enum {
23
diff --git a/tcg/tcg.c b/tcg/tcg.c
24
index XXXXXXX..XXXXXXX 100644
25
--- a/tcg/tcg.c
26
+++ b/tcg/tcg.c
27
@@ -XXX,XX +XXX,XX @@ static void init_call_layout(TCGHelperInfo *info)
28
/* Query the last register now to trigger any assert early. */
29
tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
30
break;
31
+ case TCG_CALL_RET_BY_VEC:
32
+ /* Query the single register now to trigger any assert early. */
33
+ tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
34
+ break;
35
case TCG_CALL_RET_BY_REF:
36
/*
37
* Allocate the first argument to the output.
38
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
39
}
40
break;
41
42
+ case TCG_CALL_RET_BY_VEC:
43
+ {
44
+ TCGTemp *ts = arg_temp(op->args[0]);
45
+
46
+ tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
47
+ tcg_debug_assert(ts->temp_subindex == 0);
48
+ if (!ts->mem_allocated) {
49
+ temp_allocate_frame(s, ts);
50
+ }
51
+ tcg_out_st(s, TCG_TYPE_V128,
52
+ tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
53
+ ts->mem_base->reg, ts->mem_offset);
54
+ }
55
+ /* fall through to mark all parts in memory */
56
+
57
case TCG_CALL_RET_BY_REF:
58
/* The callee has performed a write through the reference. */
59
for (i = 0; i < nb_oargs; i++) {
60
--
61
2.34.1
62
63
diff view generated by jsdifflib
New patch
1
We are about to allow passing Int128 to/from tcg helper functions,
2
but libffi doesn't support __int128_t, so use the structure.
1
3
4
In order for atomic128.h to continue working, we must provide
5
a mechanism to frob between real __int128_t and the structure.
6
Provide a new union, Int128Alias, for this. We cannot modify
7
Int128 itself, as any changed alignment would also break libffi.
8
9
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
10
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
13
include/qemu/atomic128.h | 29 +++++++++++++++++++++------
14
include/qemu/int128.h | 25 +++++++++++++++++++++---
15
util/int128.c | 42 ++++++++++++++++++++++++++++++++++++++++
16
3 files changed, 87 insertions(+), 9 deletions(-)
17
18
diff --git a/include/qemu/atomic128.h b/include/qemu/atomic128.h
19
index XXXXXXX..XXXXXXX 100644
20
--- a/include/qemu/atomic128.h
21
+++ b/include/qemu/atomic128.h
22
@@ -XXX,XX +XXX,XX @@
23
#if defined(CONFIG_ATOMIC128)
24
static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
25
{
26
- return qatomic_cmpxchg__nocheck(ptr, cmp, new);
27
+ Int128Alias r, c, n;
28
+
29
+ c.s = cmp;
30
+ n.s = new;
31
+ r.i = qatomic_cmpxchg__nocheck((__int128_t *)ptr, c.i, n.i);
32
+ return r.s;
33
}
34
# define HAVE_CMPXCHG128 1
35
#elif defined(CONFIG_CMPXCHG128)
36
static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
37
{
38
- return __sync_val_compare_and_swap_16(ptr, cmp, new);
39
+ Int128Alias r, c, n;
40
+
41
+ c.s = cmp;
42
+ n.s = new;
43
+ r.i = __sync_val_compare_and_swap_16((__int128_t *)ptr, c.i, n.i);
44
+ return r.s;
45
}
46
# define HAVE_CMPXCHG128 1
47
#elif defined(__aarch64__)
48
@@ -XXX,XX +XXX,XX @@ Int128 QEMU_ERROR("unsupported atomic")
49
#if defined(CONFIG_ATOMIC128)
50
static inline Int128 atomic16_read(Int128 *ptr)
51
{
52
- return qatomic_read__nocheck(ptr);
53
+ Int128Alias r;
54
+
55
+ r.i = qatomic_read__nocheck((__int128_t *)ptr);
56
+ return r.s;
57
}
58
59
static inline void atomic16_set(Int128 *ptr, Int128 val)
60
{
61
- qatomic_set__nocheck(ptr, val);
62
+ Int128Alias v;
63
+
64
+ v.s = val;
65
+ qatomic_set__nocheck((__int128_t *)ptr, v.i);
66
}
67
68
# define HAVE_ATOMIC128 1
69
@@ -XXX,XX +XXX,XX @@ static inline void atomic16_set(Int128 *ptr, Int128 val)
70
static inline Int128 atomic16_read(Int128 *ptr)
71
{
72
/* Maybe replace 0 with 0, returning the old value. */
73
- return atomic16_cmpxchg(ptr, 0, 0);
74
+ Int128 z = int128_make64(0);
75
+ return atomic16_cmpxchg(ptr, z, z);
76
}
77
78
static inline void atomic16_set(Int128 *ptr, Int128 val)
79
@@ -XXX,XX +XXX,XX @@ static inline void atomic16_set(Int128 *ptr, Int128 val)
80
do {
81
cmp = old;
82
old = atomic16_cmpxchg(ptr, cmp, val);
83
- } while (old != cmp);
84
+ } while (int128_ne(old, cmp));
85
}
86
87
# define HAVE_ATOMIC128 1
88
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
89
index XXXXXXX..XXXXXXX 100644
90
--- a/include/qemu/int128.h
91
+++ b/include/qemu/int128.h
92
@@ -XXX,XX +XXX,XX @@
93
94
#include "qemu/bswap.h"
95
96
-#ifdef CONFIG_INT128
97
+/*
98
+ * With TCI, we need to use libffi for interfacing with TCG helpers.
99
+ * But libffi does not support __int128_t, and therefore cannot pass
100
+ * or return values of this type, force use of the Int128 struct.
101
+ */
102
+#if defined(CONFIG_INT128) && !defined(CONFIG_TCG_INTERPRETER)
103
typedef __int128_t Int128;
104
105
static inline Int128 int128_make64(uint64_t a)
106
@@ -XXX,XX +XXX,XX @@ Int128 int128_divu(Int128, Int128);
107
Int128 int128_remu(Int128, Int128);
108
Int128 int128_divs(Int128, Int128);
109
Int128 int128_rems(Int128, Int128);
110
-
111
-#endif /* CONFIG_INT128 */
112
+#endif /* CONFIG_INT128 && !CONFIG_TCG_INTERPRETER */
113
114
static inline void bswap128s(Int128 *s)
115
{
116
@@ -XXX,XX +XXX,XX @@ static inline void bswap128s(Int128 *s)
117
#define INT128_MAX int128_make128(UINT64_MAX, INT64_MAX)
118
#define INT128_MIN int128_make128(0, INT64_MIN)
119
120
+/*
121
+ * When compiler supports a 128-bit type, define a combination of
122
+ * a possible structure and the native types. Ease parameter passing
123
+ * via use of the transparent union extension.
124
+ */
125
+#ifdef CONFIG_INT128
126
+typedef union {
127
+ Int128 s;
128
+ __int128_t i;
129
+ __uint128_t u;
130
+} Int128Alias __attribute__((transparent_union));
131
+#else
132
+typedef Int128 Int128Alias;
133
+#endif /* CONFIG_INT128 */
134
+
135
#endif /* INT128_H */
136
diff --git a/util/int128.c b/util/int128.c
137
index XXXXXXX..XXXXXXX 100644
138
--- a/util/int128.c
139
+++ b/util/int128.c
140
@@ -XXX,XX +XXX,XX @@ Int128 int128_rems(Int128 a, Int128 b)
141
return r;
142
}
143
144
+#elif defined(CONFIG_TCG_INTERPRETER)
145
+
146
+Int128 int128_divu(Int128 a_s, Int128 b_s)
147
+{
148
+ Int128Alias r, a, b;
149
+
150
+ a.s = a_s;
151
+ b.s = b_s;
152
+ r.u = a.u / b.u;
153
+ return r.s;
154
+}
155
+
156
+Int128 int128_remu(Int128 a_s, Int128 b_s)
157
+{
158
+ Int128Alias r, a, b;
159
+
160
+ a.s = a_s;
161
+ b.s = b_s;
162
+ r.u = a.u % b.u;
163
+ return r.s;
164
+}
165
+
166
+Int128 int128_divs(Int128 a_s, Int128 b_s)
167
+{
168
+ Int128Alias r, a, b;
169
+
170
+ a.s = a_s;
171
+ b.s = b_s;
172
+ r.i = a.i / b.i;
173
+ return r.s;
174
+}
175
+
176
+Int128 int128_rems(Int128 a_s, Int128 b_s)
177
+{
178
+ Int128Alias r, a, b;
179
+
180
+ a.s = a_s;
181
+ b.s = b_s;
182
+ r.i = a.i % b.i;
183
+ return r.s;
184
+}
185
+
186
#endif
187
--
188
2.34.1
189
190
diff view generated by jsdifflib
New patch
1
Fill in the parameters for the host ABI for Int128.
2
Adjust tcg_target_call_oarg_reg for _WIN64, and
3
tcg_out_call for i386 sysv. Allow TCG_TYPE_V128
4
stores without AVX enabled.
1
5
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tcg/i386/tcg-target.h | 10 ++++++++++
10
tcg/i386/tcg-target.c.inc | 30 +++++++++++++++++++++++++++++-
11
2 files changed, 39 insertions(+), 1 deletion(-)
12
13
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/i386/tcg-target.h
16
+++ b/tcg/i386/tcg-target.h
17
@@ -XXX,XX +XXX,XX @@ typedef enum {
18
#endif
19
#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
20
#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
21
+#if defined(_WIN64)
22
+# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_BY_REF
23
+# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_VEC
24
+#elif TCG_TARGET_REG_BITS == 64
25
+# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL
26
+# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
27
+#else
28
+# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL
29
+# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF
30
+#endif
31
32
extern bool have_bmi1;
33
extern bool have_popcnt;
34
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
35
index XXXXXXX..XXXXXXX 100644
36
--- a/tcg/i386/tcg-target.c.inc
37
+++ b/tcg/i386/tcg-target.c.inc
38
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
39
case TCG_CALL_RET_NORMAL:
40
tcg_debug_assert(slot >= 0 && slot <= 1);
41
return slot ? TCG_REG_EDX : TCG_REG_EAX;
42
+#ifdef _WIN64
43
+ case TCG_CALL_RET_BY_VEC:
44
+ tcg_debug_assert(slot == 0);
45
+ return TCG_REG_XMM0;
46
+#endif
47
default:
48
g_assert_not_reached();
49
}
50
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
51
* The gvec infrastructure is asserts that v128 vector loads
52
* and stores use a 16-byte aligned offset. Validate that the
53
* final pointer is aligned by using an insn that will SIGSEGV.
54
+ *
55
+ * This specific instance is also used by TCG_CALL_RET_BY_VEC,
56
+ * for _WIN64, which must have SSE2 but may not have AVX.
57
*/
58
tcg_debug_assert(arg >= 16);
59
- tcg_out_vex_modrm_offset(s, OPC_MOVDQA_WxVx, arg, 0, arg1, arg2);
60
+ if (have_avx1) {
61
+ tcg_out_vex_modrm_offset(s, OPC_MOVDQA_WxVx, arg, 0, arg1, arg2);
62
+ } else {
63
+ tcg_out_modrm_offset(s, OPC_MOVDQA_WxVx, arg, arg1, arg2);
64
+ }
65
break;
66
case TCG_TYPE_V256:
67
/*
68
@@ -XXX,XX +XXX,XX @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest,
69
const TCGHelperInfo *info)
70
{
71
tcg_out_branch(s, 1, dest);
72
+
73
+#ifndef _WIN32
74
+ if (TCG_TARGET_REG_BITS == 32 && info->out_kind == TCG_CALL_RET_BY_REF) {
75
+ /*
76
+ * The sysv i386 abi for struct return places a reference as the
77
+ * first argument of the stack, and pops that argument with the
78
+ * return statement. Since we want to retain the aligned stack
79
+ * pointer for the callee, we do not want to actually push that
80
+ * argument before the call but rely on the normal store to the
81
+ * stack slot. But we do need to compensate for the pop in order
82
+ * to reset our correct stack pointer value.
83
+ * Pushing a garbage value back onto the stack is quickest.
84
+ */
85
+ tcg_out_push(s, TCG_REG_EAX);
86
+ }
87
+#endif
88
}
89
90
static void tcg_out_jmp(TCGContext *s, const tcg_insn_unit *dest)
91
--
92
2.34.1
93
94
diff view generated by jsdifflib
New patch
1
We expect the backend to require register pairs in
2
host-endian ordering, thus for big-endian the first
3
register of a pair contains the high part.
4
We were forcing R0 to contain the low part for calls.
1
5
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
10
tcg/tci.c | 21 +++++++++++----------
11
1 file changed, 11 insertions(+), 10 deletions(-)
12
13
diff --git a/tcg/tci.c b/tcg/tci.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/tci.c
16
+++ b/tcg/tci.c
17
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
18
ffi_call(pptr[1], pptr[0], stack, call_slots);
19
}
20
21
- /* Any result winds up "left-aligned" in the stack[0] slot. */
22
switch (len) {
23
case 0: /* void */
24
break;
25
case 1: /* uint32_t */
26
/*
27
+ * The result winds up "left-aligned" in the stack[0] slot.
28
* Note that libffi has an odd special case in that it will
29
* always widen an integral result to ffi_arg.
30
*/
31
- if (sizeof(ffi_arg) == 4) {
32
- regs[TCG_REG_R0] = *(uint32_t *)stack;
33
- break;
34
- }
35
- /* fall through */
36
- case 2: /* uint64_t */
37
- if (TCG_TARGET_REG_BITS == 32) {
38
- tci_write_reg64(regs, TCG_REG_R1, TCG_REG_R0, stack[0]);
39
+ if (sizeof(ffi_arg) == 8) {
40
+ regs[TCG_REG_R0] = (uint32_t)stack[0];
41
} else {
42
- regs[TCG_REG_R0] = stack[0];
43
+ regs[TCG_REG_R0] = *(uint32_t *)stack;
44
}
45
break;
46
+ case 2: /* uint64_t */
47
+ /*
48
+ * For TCG_TARGET_REG_BITS == 32, the register pair
49
+ * must stay in host memory order.
50
+ */
51
+ memcpy(&regs[TCG_REG_R0], stack, 8);
52
+ break;
53
default:
54
g_assert_not_reached();
55
}
56
--
57
2.34.1
58
59
diff view generated by jsdifflib
New patch
1
Fill in the parameters for libffi for Int128.
2
Adjust the interpreter to allow for 16-byte return values.
3
Adjust tcg_out_call to record the return value length.
1
4
5
Call parameters are no longer all the same size, so we
6
cannot reuse the same call_slots array for every function.
7
Compute it each time now, but only fill in slots required
8
for the call we're about to make.
9
10
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
13
tcg/tci/tcg-target.h | 3 +++
14
tcg/tcg.c | 19 +++++++++++++++++
15
tcg/tci.c | 44 ++++++++++++++++++++--------------------
16
tcg/tci/tcg-target.c.inc | 10 ++++-----
17
4 files changed, 49 insertions(+), 27 deletions(-)
18
19
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
20
index XXXXXXX..XXXXXXX 100644
21
--- a/tcg/tci/tcg-target.h
22
+++ b/tcg/tci/tcg-target.h
23
@@ -XXX,XX +XXX,XX @@ typedef enum {
24
#if TCG_TARGET_REG_BITS == 32
25
# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EVEN
26
# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN
27
+# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN
28
#else
29
# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
30
# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
31
+# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL
32
#endif
33
+#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
34
35
#define HAVE_TCG_QEMU_TB_EXEC
36
#define TCG_TARGET_NEED_POOL_LABELS
37
diff --git a/tcg/tcg.c b/tcg/tcg.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/tcg/tcg.c
40
+++ b/tcg/tcg.c
41
@@ -XXX,XX +XXX,XX @@ static GHashTable *helper_table;
42
#ifdef CONFIG_TCG_INTERPRETER
43
static ffi_type *typecode_to_ffi(int argmask)
44
{
45
+ /*
46
+ * libffi does not support __int128_t, so we have forced Int128
47
+ * to use the structure definition instead of the builtin type.
48
+ */
49
+ static ffi_type *ffi_type_i128_elements[3] = {
50
+ &ffi_type_uint64,
51
+ &ffi_type_uint64,
52
+ NULL
53
+ };
54
+ static ffi_type ffi_type_i128 = {
55
+ .size = 16,
56
+ .alignment = __alignof__(Int128),
57
+ .type = FFI_TYPE_STRUCT,
58
+ .elements = ffi_type_i128_elements,
59
+ };
60
+
61
switch (argmask) {
62
case dh_typecode_void:
63
return &ffi_type_void;
64
@@ -XXX,XX +XXX,XX @@ static ffi_type *typecode_to_ffi(int argmask)
65
return &ffi_type_sint64;
66
case dh_typecode_ptr:
67
return &ffi_type_pointer;
68
+ case dh_typecode_i128:
69
+ return &ffi_type_i128;
70
}
71
g_assert_not_reached();
72
}
73
@@ -XXX,XX +XXX,XX @@ static void init_ffi_layouts(void)
74
/* Ignoring the return type, find the last non-zero field. */
75
nargs = 32 - clz32(typemask >> 3);
76
nargs = DIV_ROUND_UP(nargs, 3);
77
+ assert(nargs <= MAX_CALL_IARGS);
78
79
ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
80
ca->cif.rtype = typecode_to_ffi(typemask & 7);
81
diff --git a/tcg/tci.c b/tcg/tci.c
82
index XXXXXXX..XXXXXXX 100644
83
--- a/tcg/tci.c
84
+++ b/tcg/tci.c
85
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
86
tcg_target_ulong regs[TCG_TARGET_NB_REGS];
87
uint64_t stack[(TCG_STATIC_CALL_ARGS_SIZE + TCG_STATIC_FRAME_SIZE)
88
/ sizeof(uint64_t)];
89
- void *call_slots[TCG_STATIC_CALL_ARGS_SIZE / sizeof(uint64_t)];
90
91
regs[TCG_AREG0] = (tcg_target_ulong)env;
92
regs[TCG_REG_CALL_STACK] = (uintptr_t)stack;
93
- /* Other call_slots entries initialized at first use (see below). */
94
- call_slots[0] = NULL;
95
tci_assert(tb_ptr);
96
97
for (;;) {
98
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
99
100
switch (opc) {
101
case INDEX_op_call:
102
- /*
103
- * Set up the ffi_avalue array once, delayed until now
104
- * because many TB's do not make any calls. In tcg_gen_callN,
105
- * we arranged for every real argument to be "left-aligned"
106
- * in each 64-bit slot.
107
- */
108
- if (unlikely(call_slots[0] == NULL)) {
109
- for (int i = 0; i < ARRAY_SIZE(call_slots); ++i) {
110
- call_slots[i] = &stack[i];
111
- }
112
- }
113
-
114
- tci_args_nl(insn, tb_ptr, &len, &ptr);
115
-
116
- /* Helper functions may need to access the "return address" */
117
- tci_tb_ptr = (uintptr_t)tb_ptr;
118
-
119
{
120
- void **pptr = ptr;
121
- ffi_call(pptr[1], pptr[0], stack, call_slots);
122
+ void *call_slots[MAX_CALL_IARGS];
123
+ ffi_cif *cif;
124
+ void *func;
125
+ unsigned i, s, n;
126
+
127
+ tci_args_nl(insn, tb_ptr, &len, &ptr);
128
+ func = ((void **)ptr)[0];
129
+ cif = ((void **)ptr)[1];
130
+
131
+ n = cif->nargs;
132
+ for (i = s = 0; i < n; ++i) {
133
+ ffi_type *t = cif->arg_types[i];
134
+ call_slots[i] = &stack[s];
135
+ s += DIV_ROUND_UP(t->size, 8);
136
+ }
137
+
138
+ /* Helper functions may need to access the "return address" */
139
+ tci_tb_ptr = (uintptr_t)tb_ptr;
140
+ ffi_call(cif, func, stack, call_slots);
141
}
142
143
switch (len) {
144
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
145
*/
146
memcpy(&regs[TCG_REG_R0], stack, 8);
147
break;
148
+ case 3: /* Int128 */
149
+ memcpy(&regs[TCG_REG_R0], stack, 16);
150
+ break;
151
default:
152
g_assert_not_reached();
153
}
154
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
155
index XXXXXXX..XXXXXXX 100644
156
--- a/tcg/tci/tcg-target.c.inc
157
+++ b/tcg/tci/tcg-target.c.inc
158
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_call_iarg_regs[] = { };
159
static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
160
{
161
tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
162
- tcg_debug_assert(slot >= 0 && slot < 64 / TCG_TARGET_REG_BITS);
163
+ tcg_debug_assert(slot >= 0 && slot < 128 / TCG_TARGET_REG_BITS);
164
return TCG_REG_R0 + slot;
165
}
166
167
@@ -XXX,XX +XXX,XX @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *func,
168
169
if (cif->rtype == &ffi_type_void) {
170
which = 0;
171
- } else if (cif->rtype->size == 4) {
172
- which = 1;
173
} else {
174
- tcg_debug_assert(cif->rtype->size == 8);
175
- which = 2;
176
+ tcg_debug_assert(cif->rtype->size == 4 ||
177
+ cif->rtype->size == 8 ||
178
+ cif->rtype->size == 16);
179
+ which = ctz32(cif->rtype->size) - 1;
180
}
181
new_pool_l2(s, 20, s->code_ptr, 0, (uintptr_t)func, (uintptr_t)cif);
182
insn = deposit32(insn, 0, 8, INDEX_op_call);
183
--
184
2.34.1
185
186
diff view generated by jsdifflib
New patch
1
Fill in the parameters for the host ABI for Int128 for
2
those backends which require no extra modification.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/aarch64/tcg-target.h | 2 ++
9
tcg/arm/tcg-target.h | 2 ++
10
tcg/loongarch64/tcg-target.h | 2 ++
11
tcg/mips/tcg-target.h | 2 ++
12
tcg/riscv/tcg-target.h | 3 +++
13
tcg/s390x/tcg-target.h | 2 ++
14
tcg/sparc64/tcg-target.h | 2 ++
15
tcg/tcg.c | 6 +++---
16
tcg/ppc/tcg-target.c.inc | 3 +++
17
9 files changed, 21 insertions(+), 3 deletions(-)
18
19
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
20
index XXXXXXX..XXXXXXX 100644
21
--- a/tcg/aarch64/tcg-target.h
22
+++ b/tcg/aarch64/tcg-target.h
23
@@ -XXX,XX +XXX,XX @@ typedef enum {
24
#define TCG_TARGET_CALL_STACK_OFFSET 0
25
#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
26
#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
27
+#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN
28
+#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
29
30
/* optional instructions */
31
#define TCG_TARGET_HAS_div_i32 1
32
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
33
index XXXXXXX..XXXXXXX 100644
34
--- a/tcg/arm/tcg-target.h
35
+++ b/tcg/arm/tcg-target.h
36
@@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions;
37
#define TCG_TARGET_CALL_STACK_OFFSET    0
38
#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
39
#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN
40
+#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN
41
+#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF
42
43
/* optional instructions */
44
#define TCG_TARGET_HAS_ext8s_i32 1
45
diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
46
index XXXXXXX..XXXXXXX 100644
47
--- a/tcg/loongarch64/tcg-target.h
48
+++ b/tcg/loongarch64/tcg-target.h
49
@@ -XXX,XX +XXX,XX @@ typedef enum {
50
#define TCG_TARGET_CALL_STACK_OFFSET 0
51
#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
52
#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
53
+#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL
54
+#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
55
56
/* optional instructions */
57
#define TCG_TARGET_HAS_movcond_i32 1
58
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
59
index XXXXXXX..XXXXXXX 100644
60
--- a/tcg/mips/tcg-target.h
61
+++ b/tcg/mips/tcg-target.h
62
@@ -XXX,XX +XXX,XX @@ typedef enum {
63
# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
64
#endif
65
#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
66
+#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN
67
+#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
68
69
/* MOVN/MOVZ instructions detection */
70
#if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \
71
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
72
index XXXXXXX..XXXXXXX 100644
73
--- a/tcg/riscv/tcg-target.h
74
+++ b/tcg/riscv/tcg-target.h
75
@@ -XXX,XX +XXX,XX @@ typedef enum {
76
#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
77
#if TCG_TARGET_REG_BITS == 32
78
#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN
79
+#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN
80
#else
81
#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
82
+#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL
83
#endif
84
+#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
85
86
/* optional instructions */
87
#define TCG_TARGET_HAS_movcond_i32 0
88
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
89
index XXXXXXX..XXXXXXX 100644
90
--- a/tcg/s390x/tcg-target.h
91
+++ b/tcg/s390x/tcg-target.h
92
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
93
#define TCG_TARGET_CALL_STACK_OFFSET    160
94
#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EXTEND
95
#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
96
+#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_BY_REF
97
+#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF
98
99
#define TCG_TARGET_HAS_MEMORY_BSWAP 1
100
101
diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h
102
index XXXXXXX..XXXXXXX 100644
103
--- a/tcg/sparc64/tcg-target.h
104
+++ b/tcg/sparc64/tcg-target.h
105
@@ -XXX,XX +XXX,XX @@ typedef enum {
106
#define TCG_TARGET_CALL_STACK_OFFSET (128 + 6*8 + TCG_TARGET_STACK_BIAS)
107
#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EXTEND
108
#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
109
+#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL
110
+#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
111
112
#if defined(__VIS__) && __VIS__ >= 0x300
113
#define use_vis3_instructions 1
114
diff --git a/tcg/tcg.c b/tcg/tcg.c
115
index XXXXXXX..XXXXXXX 100644
116
--- a/tcg/tcg.c
117
+++ b/tcg/tcg.c
118
@@ -XXX,XX +XXX,XX @@ static void init_call_layout(TCGHelperInfo *info)
119
break;
120
case dh_typecode_i128:
121
info->nr_out = 128 / TCG_TARGET_REG_BITS;
122
- info->out_kind = TCG_CALL_RET_NORMAL; /* TODO */
123
- switch (/* TODO */ TCG_CALL_RET_NORMAL) {
124
+ info->out_kind = TCG_TARGET_CALL_RET_I128;
125
+ switch (TCG_TARGET_CALL_RET_I128) {
126
case TCG_CALL_RET_NORMAL:
127
/* Query the last register now to trigger any assert early. */
128
tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
129
@@ -XXX,XX +XXX,XX @@ static void init_call_layout(TCGHelperInfo *info)
130
break;
131
132
case TCG_TYPE_I128:
133
- switch (/* TODO */ TCG_CALL_ARG_NORMAL) {
134
+ switch (TCG_TARGET_CALL_ARG_I128) {
135
case TCG_CALL_ARG_EVEN:
136
layout_arg_even(&cum);
137
/* fall through */
138
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
139
index XXXXXXX..XXXXXXX 100644
140
--- a/tcg/ppc/tcg-target.c.inc
141
+++ b/tcg/ppc/tcg-target.c.inc
142
@@ -XXX,XX +XXX,XX @@
143
#else
144
# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
145
#endif
146
+/* Note sysv arg alignment applies only to 2-word types, not more. */
147
+#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL
148
+#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
149
150
/* For some memory operations, we need a scratch that isn't R0. For the AIX
151
calling convention, we can re-use the TOC register since we'll be reloading
152
--
153
2.34.1
154
155
diff view generated by jsdifflib
New patch
1
This enables allocation of i128. The type is not yet
2
usable, as we have not yet added data movement ops.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
include/tcg/tcg.h | 32 +++++++++++++++++++++++++
9
tcg/tcg.c | 60 +++++++++++++++++++++++++++++++++--------------
10
2 files changed, 74 insertions(+), 18 deletions(-)
11
12
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/include/tcg/tcg.h
15
+++ b/include/tcg/tcg.h
16
@@ -XXX,XX +XXX,XX @@ static inline TCGTemp *tcgv_i64_temp(TCGv_i64 v)
17
return tcgv_i32_temp((TCGv_i32)v);
18
}
19
20
+static inline TCGTemp *tcgv_i128_temp(TCGv_i128 v)
21
+{
22
+ return tcgv_i32_temp((TCGv_i32)v);
23
+}
24
+
25
static inline TCGTemp *tcgv_ptr_temp(TCGv_ptr v)
26
{
27
return tcgv_i32_temp((TCGv_i32)v);
28
@@ -XXX,XX +XXX,XX @@ static inline TCGArg tcgv_i64_arg(TCGv_i64 v)
29
return temp_arg(tcgv_i64_temp(v));
30
}
31
32
+static inline TCGArg tcgv_i128_arg(TCGv_i128 v)
33
+{
34
+ return temp_arg(tcgv_i128_temp(v));
35
+}
36
+
37
static inline TCGArg tcgv_ptr_arg(TCGv_ptr v)
38
{
39
return temp_arg(tcgv_ptr_temp(v));
40
@@ -XXX,XX +XXX,XX @@ static inline TCGv_i64 temp_tcgv_i64(TCGTemp *t)
41
return (TCGv_i64)temp_tcgv_i32(t);
42
}
43
44
+static inline TCGv_i128 temp_tcgv_i128(TCGTemp *t)
45
+{
46
+ return (TCGv_i128)temp_tcgv_i32(t);
47
+}
48
+
49
static inline TCGv_ptr temp_tcgv_ptr(TCGTemp *t)
50
{
51
return (TCGv_ptr)temp_tcgv_i32(t);
52
@@ -XXX,XX +XXX,XX @@ static inline void tcg_temp_free_i64(TCGv_i64 arg)
53
tcg_temp_free_internal(tcgv_i64_temp(arg));
54
}
55
56
+static inline void tcg_temp_free_i128(TCGv_i128 arg)
57
+{
58
+ tcg_temp_free_internal(tcgv_i128_temp(arg));
59
+}
60
+
61
static inline void tcg_temp_free_ptr(TCGv_ptr arg)
62
{
63
tcg_temp_free_internal(tcgv_ptr_temp(arg));
64
@@ -XXX,XX +XXX,XX @@ static inline TCGv_i64 tcg_temp_local_new_i64(void)
65
return temp_tcgv_i64(t);
66
}
67
68
+static inline TCGv_i128 tcg_temp_new_i128(void)
69
+{
70
+ TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I128, false);
71
+ return temp_tcgv_i128(t);
72
+}
73
+
74
+static inline TCGv_i128 tcg_temp_local_new_i128(void)
75
+{
76
+ TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I128, true);
77
+ return temp_tcgv_i128(t);
78
+}
79
+
80
static inline TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t offset,
81
const char *name)
82
{
83
diff --git a/tcg/tcg.c b/tcg/tcg.c
84
index XXXXXXX..XXXXXXX 100644
85
--- a/tcg/tcg.c
86
+++ b/tcg/tcg.c
87
@@ -XXX,XX +XXX,XX @@ TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
88
tcg_debug_assert(ts->base_type == type);
89
tcg_debug_assert(ts->kind == kind);
90
} else {
91
+ int i, n;
92
+
93
+ switch (type) {
94
+ case TCG_TYPE_I32:
95
+ case TCG_TYPE_V64:
96
+ case TCG_TYPE_V128:
97
+ case TCG_TYPE_V256:
98
+ n = 1;
99
+ break;
100
+ case TCG_TYPE_I64:
101
+ n = 64 / TCG_TARGET_REG_BITS;
102
+ break;
103
+ case TCG_TYPE_I128:
104
+ n = 128 / TCG_TARGET_REG_BITS;
105
+ break;
106
+ default:
107
+ g_assert_not_reached();
108
+ }
109
+
110
ts = tcg_temp_alloc(s);
111
- if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
112
- TCGTemp *ts2 = tcg_temp_alloc(s);
113
+ ts->base_type = type;
114
+ ts->temp_allocated = 1;
115
+ ts->kind = kind;
116
117
- ts->base_type = type;
118
- ts->type = TCG_TYPE_I32;
119
- ts->temp_allocated = 1;
120
- ts->kind = kind;
121
-
122
- tcg_debug_assert(ts2 == ts + 1);
123
- ts2->base_type = TCG_TYPE_I64;
124
- ts2->type = TCG_TYPE_I32;
125
- ts2->temp_allocated = 1;
126
- ts2->temp_subindex = 1;
127
- ts2->kind = kind;
128
- } else {
129
- ts->base_type = type;
130
+ if (n == 1) {
131
ts->type = type;
132
- ts->temp_allocated = 1;
133
- ts->kind = kind;
134
+ } else {
135
+ ts->type = TCG_TYPE_REG;
136
+
137
+ for (i = 1; i < n; ++i) {
138
+ TCGTemp *ts2 = tcg_temp_alloc(s);
139
+
140
+ tcg_debug_assert(ts2 == ts + i);
141
+ ts2->base_type = type;
142
+ ts2->type = TCG_TYPE_REG;
143
+ ts2->temp_allocated = 1;
144
+ ts2->temp_subindex = i;
145
+ ts2->kind = kind;
146
+ }
147
}
148
}
149
150
@@ -XXX,XX +XXX,XX @@ static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
151
case TCG_TYPE_V64:
152
align = 8;
153
break;
154
+ case TCG_TYPE_I128:
155
case TCG_TYPE_V128:
156
case TCG_TYPE_V256:
157
- /* Note that we do not require aligned storage for V256. */
158
+ /*
159
+ * Note that we do not require aligned storage for V256,
160
+ * and that we provide alignment for I128 to match V128,
161
+ * even if that's above what the host ABI requires.
162
+ */
163
align = 16;
164
break;
165
default:
166
--
167
2.34.1
168
169
diff view generated by jsdifflib
New patch
1
Add code generation functions for data movement between
2
TCGv_i128 (mov) and to/from TCGv_i64 (concat, extract).
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
include/tcg/tcg-op.h | 4 ++++
9
tcg/tcg-internal.h | 13 +++++++++++++
10
tcg/tcg-op.c | 20 ++++++++++++++++++++
11
3 files changed, 37 insertions(+)
12
13
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/include/tcg/tcg-op.h
16
+++ b/include/tcg/tcg-op.h
17
@@ -XXX,XX +XXX,XX @@ void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg);
18
void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg);
19
void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg);
20
21
+void tcg_gen_mov_i128(TCGv_i128 dst, TCGv_i128 src);
22
+void tcg_gen_extr_i128_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i128 arg);
23
+void tcg_gen_concat_i64_i128(TCGv_i128 ret, TCGv_i64 lo, TCGv_i64 hi);
24
+
25
static inline void tcg_gen_concat32_i64(TCGv_i64 ret, TCGv_i64 lo, TCGv_i64 hi)
26
{
27
tcg_gen_deposit_i64(ret, lo, hi, 32, 32);
28
diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h
29
index XXXXXXX..XXXXXXX 100644
30
--- a/tcg/tcg-internal.h
31
+++ b/tcg/tcg-internal.h
32
@@ -XXX,XX +XXX,XX @@ extern TCGv_i32 TCGV_LOW(TCGv_i64) QEMU_ERROR("32-bit code path is reachable");
33
extern TCGv_i32 TCGV_HIGH(TCGv_i64) QEMU_ERROR("32-bit code path is reachable");
34
#endif
35
36
+static inline TCGv_i64 TCGV128_LOW(TCGv_i128 t)
37
+{
38
+ /* For 32-bit, offset by 2, which may then have TCGV_{LOW,HIGH} applied. */
39
+ int o = HOST_BIG_ENDIAN ? 64 / TCG_TARGET_REG_BITS : 0;
40
+ return temp_tcgv_i64(tcgv_i128_temp(t) + o);
41
+}
42
+
43
+static inline TCGv_i64 TCGV128_HIGH(TCGv_i128 t)
44
+{
45
+ int o = HOST_BIG_ENDIAN ? 0 : 64 / TCG_TARGET_REG_BITS;
46
+ return temp_tcgv_i64(tcgv_i128_temp(t) + o);
47
+}
48
+
49
#endif /* TCG_INTERNAL_H */
50
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
51
index XXXXXXX..XXXXXXX 100644
52
--- a/tcg/tcg-op.c
53
+++ b/tcg/tcg-op.c
54
@@ -XXX,XX +XXX,XX @@ void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg)
55
tcg_gen_shri_i64(hi, arg, 32);
56
}
57
58
+void tcg_gen_extr_i128_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i128 arg)
59
+{
60
+ tcg_gen_mov_i64(lo, TCGV128_LOW(arg));
61
+ tcg_gen_mov_i64(hi, TCGV128_HIGH(arg));
62
+}
63
+
64
+void tcg_gen_concat_i64_i128(TCGv_i128 ret, TCGv_i64 lo, TCGv_i64 hi)
65
+{
66
+ tcg_gen_mov_i64(TCGV128_LOW(ret), lo);
67
+ tcg_gen_mov_i64(TCGV128_HIGH(ret), hi);
68
+}
69
+
70
+void tcg_gen_mov_i128(TCGv_i128 dst, TCGv_i128 src)
71
+{
72
+ if (dst != src) {
73
+ tcg_gen_mov_i64(TCGV128_LOW(dst), TCGV128_LOW(src));
74
+ tcg_gen_mov_i64(TCGV128_HIGH(dst), TCGV128_HIGH(src));
75
+ }
76
+}
77
+
78
/* QEMU specific operations. */
79
80
void tcg_gen_exit_tb(const TranslationBlock *tb, unsigned idx)
81
--
82
2.34.1
83
84
diff view generated by jsdifflib
New patch
1
These are not yet considering atomicity of the 16-byte value;
2
this is a direct replacement for the current target code which
3
uses a pair of 8-byte operations.
1
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
include/exec/cpu_ldst.h | 10 +++
9
include/tcg/tcg-op.h | 2 +
10
accel/tcg/cputlb.c | 112 +++++++++++++++++++++++++++++++++
11
accel/tcg/user-exec.c | 66 ++++++++++++++++++++
12
tcg/tcg-op.c | 134 ++++++++++++++++++++++++++++++++++++++++
13
5 files changed, 324 insertions(+)
14
15
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/include/exec/cpu_ldst.h
18
+++ b/include/exec/cpu_ldst.h
19
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr ptr,
20
uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr ptr,
21
MemOpIdx oi, uintptr_t ra);
22
23
+Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
24
+ MemOpIdx oi, uintptr_t ra);
25
+Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
26
+ MemOpIdx oi, uintptr_t ra);
27
+
28
void cpu_stb_mmu(CPUArchState *env, abi_ptr ptr, uint8_t val,
29
MemOpIdx oi, uintptr_t ra);
30
void cpu_stw_be_mmu(CPUArchState *env, abi_ptr ptr, uint16_t val,
31
@@ -XXX,XX +XXX,XX @@ void cpu_stl_le_mmu(CPUArchState *env, abi_ptr ptr, uint32_t val,
32
void cpu_stq_le_mmu(CPUArchState *env, abi_ptr ptr, uint64_t val,
33
MemOpIdx oi, uintptr_t ra);
34
35
+void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
36
+ MemOpIdx oi, uintptr_t ra);
37
+void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
38
+ MemOpIdx oi, uintptr_t ra);
39
+
40
uint32_t cpu_atomic_cmpxchgb_mmu(CPUArchState *env, target_ulong addr,
41
uint32_t cmpv, uint32_t newv,
42
MemOpIdx oi, uintptr_t retaddr);
43
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
44
index XXXXXXX..XXXXXXX 100644
45
--- a/include/tcg/tcg-op.h
46
+++ b/include/tcg/tcg-op.h
47
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i32(TCGv_i32, TCGv, TCGArg, MemOp);
48
void tcg_gen_qemu_st_i32(TCGv_i32, TCGv, TCGArg, MemOp);
49
void tcg_gen_qemu_ld_i64(TCGv_i64, TCGv, TCGArg, MemOp);
50
void tcg_gen_qemu_st_i64(TCGv_i64, TCGv, TCGArg, MemOp);
51
+void tcg_gen_qemu_ld_i128(TCGv_i128, TCGv, TCGArg, MemOp);
52
+void tcg_gen_qemu_st_i128(TCGv_i128, TCGv, TCGArg, MemOp);
53
54
static inline void tcg_gen_qemu_ld8u(TCGv ret, TCGv addr, int mem_index)
55
{
56
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
57
index XXXXXXX..XXXXXXX 100644
58
--- a/accel/tcg/cputlb.c
59
+++ b/accel/tcg/cputlb.c
60
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
61
return cpu_load_helper(env, addr, oi, ra, helper_le_ldq_mmu);
62
}
63
64
+Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
65
+ MemOpIdx oi, uintptr_t ra)
66
+{
67
+ MemOp mop = get_memop(oi);
68
+ int mmu_idx = get_mmuidx(oi);
69
+ MemOpIdx new_oi;
70
+ unsigned a_bits;
71
+ uint64_t h, l;
72
+
73
+ tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_BE|MO_128));
74
+ a_bits = get_alignment_bits(mop);
75
+
76
+ /* Handle CPU specific unaligned behaviour */
77
+ if (addr & ((1 << a_bits) - 1)) {
78
+ cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_LOAD,
79
+ mmu_idx, ra);
80
+ }
81
+
82
+ /* Construct an unaligned 64-bit replacement MemOpIdx. */
83
+ mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
84
+ new_oi = make_memop_idx(mop, mmu_idx);
85
+
86
+ h = helper_be_ldq_mmu(env, addr, new_oi, ra);
87
+ l = helper_be_ldq_mmu(env, addr + 8, new_oi, ra);
88
+
89
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
90
+ return int128_make128(l, h);
91
+}
92
+
93
+Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
94
+ MemOpIdx oi, uintptr_t ra)
95
+{
96
+ MemOp mop = get_memop(oi);
97
+ int mmu_idx = get_mmuidx(oi);
98
+ MemOpIdx new_oi;
99
+ unsigned a_bits;
100
+ uint64_t h, l;
101
+
102
+ tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_LE|MO_128));
103
+ a_bits = get_alignment_bits(mop);
104
+
105
+ /* Handle CPU specific unaligned behaviour */
106
+ if (addr & ((1 << a_bits) - 1)) {
107
+ cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_LOAD,
108
+ mmu_idx, ra);
109
+ }
110
+
111
+ /* Construct an unaligned 64-bit replacement MemOpIdx. */
112
+ mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
113
+ new_oi = make_memop_idx(mop, mmu_idx);
114
+
115
+ l = helper_le_ldq_mmu(env, addr, new_oi, ra);
116
+ h = helper_le_ldq_mmu(env, addr + 8, new_oi, ra);
117
+
118
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
119
+ return int128_make128(l, h);
120
+}
121
+
122
/*
123
* Store Helpers
124
*/
125
@@ -XXX,XX +XXX,XX @@ void cpu_stq_le_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
126
cpu_store_helper(env, addr, val, oi, retaddr, helper_le_stq_mmu);
127
}
128
129
+void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
130
+ MemOpIdx oi, uintptr_t ra)
131
+{
132
+ MemOp mop = get_memop(oi);
133
+ int mmu_idx = get_mmuidx(oi);
134
+ MemOpIdx new_oi;
135
+ unsigned a_bits;
136
+
137
+ tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_BE|MO_128));
138
+ a_bits = get_alignment_bits(mop);
139
+
140
+ /* Handle CPU specific unaligned behaviour */
141
+ if (addr & ((1 << a_bits) - 1)) {
142
+ cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
143
+ mmu_idx, ra);
144
+ }
145
+
146
+ /* Construct an unaligned 64-bit replacement MemOpIdx. */
147
+ mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
148
+ new_oi = make_memop_idx(mop, mmu_idx);
149
+
150
+ helper_be_stq_mmu(env, addr, int128_gethi(val), new_oi, ra);
151
+ helper_be_stq_mmu(env, addr + 8, int128_getlo(val), new_oi, ra);
152
+
153
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
154
+}
155
+
156
+void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
157
+ MemOpIdx oi, uintptr_t ra)
158
+{
159
+ MemOp mop = get_memop(oi);
160
+ int mmu_idx = get_mmuidx(oi);
161
+ MemOpIdx new_oi;
162
+ unsigned a_bits;
163
+
164
+ tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_LE|MO_128));
165
+ a_bits = get_alignment_bits(mop);
166
+
167
+ /* Handle CPU specific unaligned behaviour */
168
+ if (addr & ((1 << a_bits) - 1)) {
169
+ cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
170
+ mmu_idx, ra);
171
+ }
172
+
173
+ /* Construct an unaligned 64-bit replacement MemOpIdx. */
174
+ mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
175
+ new_oi = make_memop_idx(mop, mmu_idx);
176
+
177
+ helper_le_stq_mmu(env, addr, int128_getlo(val), new_oi, ra);
178
+ helper_le_stq_mmu(env, addr + 8, int128_gethi(val), new_oi, ra);
179
+
180
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
181
+}
182
+
183
#include "ldst_common.c.inc"
184
185
/*
186
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
187
index XXXXXXX..XXXXXXX 100644
188
--- a/accel/tcg/user-exec.c
189
+++ b/accel/tcg/user-exec.c
190
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
191
return ret;
192
}
193
194
+Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
195
+ MemOpIdx oi, uintptr_t ra)
196
+{
197
+ void *haddr;
198
+ Int128 ret;
199
+
200
+ validate_memop(oi, MO_128 | MO_BE);
201
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
202
+ memcpy(&ret, haddr, 16);
203
+ clear_helper_retaddr();
204
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
205
+
206
+ if (!HOST_BIG_ENDIAN) {
207
+ ret = bswap128(ret);
208
+ }
209
+ return ret;
210
+}
211
+
212
+Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
213
+ MemOpIdx oi, uintptr_t ra)
214
+{
215
+ void *haddr;
216
+ Int128 ret;
217
+
218
+ validate_memop(oi, MO_128 | MO_LE);
219
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
220
+ memcpy(&ret, haddr, 16);
221
+ clear_helper_retaddr();
222
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
223
+
224
+ if (HOST_BIG_ENDIAN) {
225
+ ret = bswap128(ret);
226
+ }
227
+ return ret;
228
+}
229
+
230
void cpu_stb_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
231
MemOpIdx oi, uintptr_t ra)
232
{
233
@@ -XXX,XX +XXX,XX @@ void cpu_stq_le_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
234
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
235
}
236
237
+void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr,
238
+ Int128 val, MemOpIdx oi, uintptr_t ra)
239
+{
240
+ void *haddr;
241
+
242
+ validate_memop(oi, MO_128 | MO_BE);
243
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
244
+ if (!HOST_BIG_ENDIAN) {
245
+ val = bswap128(val);
246
+ }
247
+ memcpy(haddr, &val, 16);
248
+ clear_helper_retaddr();
249
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
250
+}
251
+
252
+void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr,
253
+ Int128 val, MemOpIdx oi, uintptr_t ra)
254
+{
255
+ void *haddr;
256
+
257
+ validate_memop(oi, MO_128 | MO_LE);
258
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
259
+ if (HOST_BIG_ENDIAN) {
260
+ val = bswap128(val);
261
+ }
262
+ memcpy(haddr, &val, 16);
263
+ clear_helper_retaddr();
264
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
265
+}
266
+
267
uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr ptr)
268
{
269
uint32_t ret;
270
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
271
index XXXXXXX..XXXXXXX 100644
272
--- a/tcg/tcg-op.c
273
+++ b/tcg/tcg-op.c
274
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
275
}
276
}
277
278
+static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
279
+{
280
+ MemOp mop_1 = orig, mop_2;
281
+
282
+ tcg_debug_assert((orig & MO_SIZE) == MO_128);
283
+ tcg_debug_assert((orig & MO_SIGN) == 0);
284
+
285
+ /* Use a memory ordering implemented by the host. */
286
+ if (!TCG_TARGET_HAS_MEMORY_BSWAP && (orig & MO_BSWAP)) {
287
+ mop_1 &= ~MO_BSWAP;
288
+ }
289
+
290
+ /* Reduce the size to 64-bit. */
291
+ mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
292
+
293
+ /* Retain the alignment constraints of the original. */
294
+ switch (orig & MO_AMASK) {
295
+ case MO_UNALN:
296
+ case MO_ALIGN_2:
297
+ case MO_ALIGN_4:
298
+ mop_2 = mop_1;
299
+ break;
300
+ case MO_ALIGN_8:
301
+ /* Prefer MO_ALIGN+MO_64 to MO_ALIGN_8+MO_64. */
302
+ mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
303
+ mop_2 = mop_1;
304
+ break;
305
+ case MO_ALIGN:
306
+ /* Second has 8-byte alignment; first has 16-byte alignment. */
307
+ mop_2 = mop_1;
308
+ mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN_16;
309
+ break;
310
+ case MO_ALIGN_16:
311
+ case MO_ALIGN_32:
312
+ case MO_ALIGN_64:
313
+ /* Second has 8-byte alignment; first retains original. */
314
+ mop_2 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
315
+ break;
316
+ default:
317
+ g_assert_not_reached();
318
+ }
319
+ ret[0] = mop_1;
320
+ ret[1] = mop_2;
321
+}
322
+
323
+void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
324
+{
325
+ MemOp mop[2];
326
+ TCGv addr_p8;
327
+ TCGv_i64 x, y;
328
+
329
+ canonicalize_memop_i128_as_i64(mop, memop);
330
+
331
+ tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
332
+ addr = plugin_prep_mem_callbacks(addr);
333
+
334
+ /* TODO: respect atomicity of the operation. */
335
+ /* TODO: allow the tcg backend to see the whole operation. */
336
+
337
+ /*
338
+ * Since there are no global TCGv_i128, there is no visible state
339
+ * changed if the second load faults. Load directly into the two
340
+ * subwords.
341
+ */
342
+ if ((memop & MO_BSWAP) == MO_LE) {
343
+ x = TCGV128_LOW(val);
344
+ y = TCGV128_HIGH(val);
345
+ } else {
346
+ x = TCGV128_HIGH(val);
347
+ y = TCGV128_LOW(val);
348
+ }
349
+
350
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
351
+
352
+ if ((mop[0] ^ memop) & MO_BSWAP) {
353
+ tcg_gen_bswap64_i64(x, x);
354
+ }
355
+
356
+ addr_p8 = tcg_temp_new();
357
+ tcg_gen_addi_tl(addr_p8, addr, 8);
358
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
359
+ tcg_temp_free(addr_p8);
360
+
361
+ if ((mop[0] ^ memop) & MO_BSWAP) {
362
+ tcg_gen_bswap64_i64(y, y);
363
+ }
364
+
365
+ plugin_gen_mem_callbacks(addr, make_memop_idx(memop, idx),
366
+ QEMU_PLUGIN_MEM_R);
367
+}
368
+
369
+void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
370
+{
371
+ MemOp mop[2];
372
+ TCGv addr_p8;
373
+ TCGv_i64 x, y;
374
+
375
+ canonicalize_memop_i128_as_i64(mop, memop);
376
+
377
+ tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
378
+ addr = plugin_prep_mem_callbacks(addr);
379
+
380
+ /* TODO: respect atomicity of the operation. */
381
+ /* TODO: allow the tcg backend to see the whole operation. */
382
+
383
+ if ((memop & MO_BSWAP) == MO_LE) {
384
+ x = TCGV128_LOW(val);
385
+ y = TCGV128_HIGH(val);
386
+ } else {
387
+ x = TCGV128_HIGH(val);
388
+ y = TCGV128_LOW(val);
389
+ }
390
+
391
+ addr_p8 = tcg_temp_new();
392
+ if ((mop[0] ^ memop) & MO_BSWAP) {
393
+ TCGv_i64 t = tcg_temp_new_i64();
394
+
395
+ tcg_gen_bswap64_i64(t, x);
396
+ gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
397
+ tcg_gen_bswap64_i64(t, y);
398
+ tcg_gen_addi_tl(addr_p8, addr, 8);
399
+ gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
400
+ tcg_temp_free_i64(t);
401
+ } else {
402
+ gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
403
+ tcg_gen_addi_tl(addr_p8, addr, 8);
404
+ gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
405
+ }
406
+ tcg_temp_free(addr_p8);
407
+
408
+ plugin_gen_mem_callbacks(addr, make_memop_idx(memop, idx),
409
+ QEMU_PLUGIN_MEM_W);
410
+}
411
+
412
static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
413
{
414
switch (opc & MO_SSIZE) {
415
--
416
2.34.1
417
418
diff view generated by jsdifflib
New patch
1
1
This will allow targets to avoid rolling their own.
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
accel/tcg/tcg-runtime.h | 11 +++++
8
include/tcg/tcg-op.h | 5 +++
9
tcg/tcg-op.c | 85 +++++++++++++++++++++++++++++++++++
10
accel/tcg/atomic_common.c.inc | 45 +++++++++++++++++++
11
4 files changed, 146 insertions(+)
12
13
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/accel/tcg/tcg-runtime.h
16
+++ b/accel/tcg/tcg-runtime.h
17
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(atomic_cmpxchgq_be, TCG_CALL_NO_WG,
18
DEF_HELPER_FLAGS_5(atomic_cmpxchgq_le, TCG_CALL_NO_WG,
19
i64, env, tl, i64, i64, i32)
20
#endif
21
+#ifdef CONFIG_CMPXCHG128
22
+DEF_HELPER_FLAGS_5(atomic_cmpxchgo_be, TCG_CALL_NO_WG,
23
+ i128, env, tl, i128, i128, i32)
24
+DEF_HELPER_FLAGS_5(atomic_cmpxchgo_le, TCG_CALL_NO_WG,
25
+ i128, env, tl, i128, i128, i32)
26
+#endif
27
+
28
+DEF_HELPER_FLAGS_5(nonatomic_cmpxchgo_be, TCG_CALL_NO_WG,
29
+ i128, env, tl, i128, i128, i32)
30
+DEF_HELPER_FLAGS_5(nonatomic_cmpxchgo_le, TCG_CALL_NO_WG,
31
+ i128, env, tl, i128, i128, i32)
32
33
#ifdef CONFIG_ATOMIC64
34
#define GEN_ATOMIC_HELPERS(NAME) \
35
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
36
index XXXXXXX..XXXXXXX 100644
37
--- a/include/tcg/tcg-op.h
38
+++ b/include/tcg/tcg-op.h
39
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGv_i32,
40
TCGArg, MemOp);
41
void tcg_gen_atomic_cmpxchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGv_i64,
42
TCGArg, MemOp);
43
+void tcg_gen_atomic_cmpxchg_i128(TCGv_i128, TCGv, TCGv_i128, TCGv_i128,
44
+ TCGArg, MemOp);
45
+
46
+void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128, TCGv, TCGv_i128, TCGv_i128,
47
+ TCGArg, MemOp);
48
49
void tcg_gen_atomic_xchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
50
void tcg_gen_atomic_xchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
51
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
52
index XXXXXXX..XXXXXXX 100644
53
--- a/tcg/tcg-op.c
54
+++ b/tcg/tcg-op.c
55
@@ -XXX,XX +XXX,XX @@ typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv,
56
TCGv_i32, TCGv_i32, TCGv_i32);
57
typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv,
58
TCGv_i64, TCGv_i64, TCGv_i32);
59
+typedef void (*gen_atomic_cx_i128)(TCGv_i128, TCGv_env, TCGv,
60
+ TCGv_i128, TCGv_i128, TCGv_i32);
61
typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv,
62
TCGv_i32, TCGv_i32);
63
typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv,
64
@@ -XXX,XX +XXX,XX @@ typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv,
65
#else
66
# define WITH_ATOMIC64(X)
67
#endif
68
+#ifdef CONFIG_CMPXCHG128
69
+# define WITH_ATOMIC128(X) X,
70
+#else
71
+# define WITH_ATOMIC128(X)
72
+#endif
73
74
static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
75
[MO_8] = gen_helper_atomic_cmpxchgb,
76
@@ -XXX,XX +XXX,XX @@ static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
77
[MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be,
78
WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le)
79
WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be)
80
+ WITH_ATOMIC128([MO_128 | MO_LE] = gen_helper_atomic_cmpxchgo_le)
81
+ WITH_ATOMIC128([MO_128 | MO_BE] = gen_helper_atomic_cmpxchgo_be)
82
};
83
84
void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
85
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
86
}
87
}
88
89
+void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
90
+ TCGv_i128 newv, TCGArg idx, MemOp memop)
91
+{
92
+ if (TCG_TARGET_REG_BITS == 32) {
93
+ /* Inline expansion below is simply too large for 32-bit hosts. */
94
+ gen_atomic_cx_i128 gen = ((memop & MO_BSWAP) == MO_LE
95
+ ? gen_helper_nonatomic_cmpxchgo_le
96
+ : gen_helper_nonatomic_cmpxchgo_be);
97
+ MemOpIdx oi = make_memop_idx(memop, idx);
98
+
99
+ tcg_debug_assert((memop & MO_SIZE) == MO_128);
100
+ tcg_debug_assert((memop & MO_SIGN) == 0);
101
+
102
+ gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
103
+ } else {
104
+ TCGv_i128 oldv = tcg_temp_new_i128();
105
+ TCGv_i128 tmpv = tcg_temp_new_i128();
106
+ TCGv_i64 t0 = tcg_temp_new_i64();
107
+ TCGv_i64 t1 = tcg_temp_new_i64();
108
+ TCGv_i64 z = tcg_constant_i64(0);
109
+
110
+ tcg_gen_qemu_ld_i128(oldv, addr, idx, memop);
111
+
112
+ /* Compare i128 */
113
+ tcg_gen_xor_i64(t0, TCGV128_LOW(oldv), TCGV128_LOW(cmpv));
114
+ tcg_gen_xor_i64(t1, TCGV128_HIGH(oldv), TCGV128_HIGH(cmpv));
115
+ tcg_gen_or_i64(t0, t0, t1);
116
+
117
+ /* tmpv = equal ? newv : oldv */
118
+ tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_LOW(tmpv), t0, z,
119
+ TCGV128_LOW(newv), TCGV128_LOW(oldv));
120
+ tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_HIGH(tmpv), t0, z,
121
+ TCGV128_HIGH(newv), TCGV128_HIGH(oldv));
122
+
123
+ /* Unconditional writeback. */
124
+ tcg_gen_qemu_st_i128(tmpv, addr, idx, memop);
125
+ tcg_gen_mov_i128(retv, oldv);
126
+
127
+ tcg_temp_free_i64(t0);
128
+ tcg_temp_free_i64(t1);
129
+ tcg_temp_free_i128(tmpv);
130
+ tcg_temp_free_i128(oldv);
131
+ }
132
+}
133
+
134
+void tcg_gen_atomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
135
+ TCGv_i128 newv, TCGArg idx, MemOp memop)
136
+{
137
+ gen_atomic_cx_i128 gen;
138
+
139
+ if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
140
+ tcg_gen_nonatomic_cmpxchg_i128(retv, addr, cmpv, newv, idx, memop);
141
+ return;
142
+ }
143
+
144
+ tcg_debug_assert((memop & MO_SIZE) == MO_128);
145
+ tcg_debug_assert((memop & MO_SIGN) == 0);
146
+ gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
147
+
148
+ if (gen) {
149
+ MemOpIdx oi = make_memop_idx(memop, idx);
150
+ gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
151
+ return;
152
+ }
153
+
154
+ gen_helper_exit_atomic(cpu_env);
155
+
156
+ /*
157
+ * Produce a result for a well-formed opcode stream. This satisfies
158
+ * liveness for set before used, which happens before this dead code
159
+ * is removed.
160
+ */
161
+ tcg_gen_movi_i64(TCGV128_LOW(retv), 0);
162
+ tcg_gen_movi_i64(TCGV128_HIGH(retv), 0);
163
+}
164
+
165
static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
166
TCGArg idx, MemOp memop, bool new_val,
167
void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
168
diff --git a/accel/tcg/atomic_common.c.inc b/accel/tcg/atomic_common.c.inc
169
index XXXXXXX..XXXXXXX 100644
170
--- a/accel/tcg/atomic_common.c.inc
171
+++ b/accel/tcg/atomic_common.c.inc
172
@@ -XXX,XX +XXX,XX @@ CMPXCHG_HELPER(cmpxchgq_be, uint64_t)
173
CMPXCHG_HELPER(cmpxchgq_le, uint64_t)
174
#endif
175
176
+#ifdef CONFIG_CMPXCHG128
177
+CMPXCHG_HELPER(cmpxchgo_be, Int128)
178
+CMPXCHG_HELPER(cmpxchgo_le, Int128)
179
+#endif
180
+
181
#undef CMPXCHG_HELPER
182
183
+Int128 HELPER(nonatomic_cmpxchgo_be)(CPUArchState *env, target_ulong addr,
184
+ Int128 cmpv, Int128 newv, uint32_t oi)
185
+{
186
+#if TCG_TARGET_REG_BITS == 32
187
+ uintptr_t ra = GETPC();
188
+ Int128 oldv;
189
+
190
+ oldv = cpu_ld16_be_mmu(env, addr, oi, ra);
191
+ if (int128_eq(oldv, cmpv)) {
192
+ cpu_st16_be_mmu(env, addr, newv, oi, ra);
193
+ } else {
194
+ /* Even with comparison failure, still need a write cycle. */
195
+ probe_write(env, addr, 16, get_mmuidx(oi), ra);
196
+ }
197
+ return oldv;
198
+#else
199
+ g_assert_not_reached();
200
+#endif
201
+}
202
+
203
+Int128 HELPER(nonatomic_cmpxchgo_le)(CPUArchState *env, target_ulong addr,
204
+ Int128 cmpv, Int128 newv, uint32_t oi)
205
+{
206
+#if TCG_TARGET_REG_BITS == 32
207
+ uintptr_t ra = GETPC();
208
+ Int128 oldv;
209
+
210
+ oldv = cpu_ld16_le_mmu(env, addr, oi, ra);
211
+ if (int128_eq(oldv, cmpv)) {
212
+ cpu_st16_le_mmu(env, addr, newv, oi, ra);
213
+ } else {
214
+ /* Even with comparison failure, still need a write cycle. */
215
+ probe_write(env, addr, 16, get_mmuidx(oi), ra);
216
+ }
217
+ return oldv;
218
+#else
219
+ g_assert_not_reached();
220
+#endif
221
+}
222
+
223
#define ATOMIC_HELPER(OP, TYPE) \
224
TYPE HELPER(glue(atomic_,OP))(CPUArchState *env, target_ulong addr, \
225
TYPE val, uint32_t oi) \
226
--
227
2.34.1
228
229
diff view generated by jsdifflib
New patch
1
1
Normally this is automatically handled by the CF_PARALLEL checks
2
with in tcg_gen_atomic_cmpxchg_i{32,64}, but x86 has a special
3
case of !PREFIX_LOCK where it always wants the non-atomic version.
4
5
Split these out so that x86 does not have to roll its own.
6
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
10
include/tcg/tcg-op.h | 4 ++
11
tcg/tcg-op.c | 154 +++++++++++++++++++++++++++----------------
12
2 files changed, 101 insertions(+), 57 deletions(-)
13
14
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/include/tcg/tcg-op.h
17
+++ b/include/tcg/tcg-op.h
18
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGv_i64,
19
void tcg_gen_atomic_cmpxchg_i128(TCGv_i128, TCGv, TCGv_i128, TCGv_i128,
20
TCGArg, MemOp);
21
22
+void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGv_i32,
23
+ TCGArg, MemOp);
24
+void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGv_i64,
25
+ TCGArg, MemOp);
26
void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128, TCGv, TCGv_i128, TCGv_i128,
27
TCGArg, MemOp);
28
29
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/tcg/tcg-op.c
32
+++ b/tcg/tcg-op.c
33
@@ -XXX,XX +XXX,XX @@ static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
34
WITH_ATOMIC128([MO_128 | MO_BE] = gen_helper_atomic_cmpxchgo_be)
35
};
36
37
+void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
38
+ TCGv_i32 newv, TCGArg idx, MemOp memop)
39
+{
40
+ TCGv_i32 t1 = tcg_temp_new_i32();
41
+ TCGv_i32 t2 = tcg_temp_new_i32();
42
+
43
+ tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
44
+
45
+ tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
46
+ tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
47
+ tcg_gen_qemu_st_i32(t2, addr, idx, memop);
48
+ tcg_temp_free_i32(t2);
49
+
50
+ if (memop & MO_SIGN) {
51
+ tcg_gen_ext_i32(retv, t1, memop);
52
+ } else {
53
+ tcg_gen_mov_i32(retv, t1);
54
+ }
55
+ tcg_temp_free_i32(t1);
56
+}
57
+
58
void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
59
TCGv_i32 newv, TCGArg idx, MemOp memop)
60
{
61
- memop = tcg_canonicalize_memop(memop, 0, 0);
62
+ gen_atomic_cx_i32 gen;
63
+ MemOpIdx oi;
64
65
if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
66
- TCGv_i32 t1 = tcg_temp_new_i32();
67
- TCGv_i32 t2 = tcg_temp_new_i32();
68
-
69
- tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
70
-
71
- tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
72
- tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
73
- tcg_gen_qemu_st_i32(t2, addr, idx, memop);
74
- tcg_temp_free_i32(t2);
75
-
76
- if (memop & MO_SIGN) {
77
- tcg_gen_ext_i32(retv, t1, memop);
78
- } else {
79
- tcg_gen_mov_i32(retv, t1);
80
- }
81
- tcg_temp_free_i32(t1);
82
- } else {
83
- gen_atomic_cx_i32 gen;
84
- MemOpIdx oi;
85
-
86
- gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
87
- tcg_debug_assert(gen != NULL);
88
-
89
- oi = make_memop_idx(memop & ~MO_SIGN, idx);
90
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
91
-
92
- if (memop & MO_SIGN) {
93
- tcg_gen_ext_i32(retv, retv, memop);
94
- }
95
+ tcg_gen_nonatomic_cmpxchg_i32(retv, addr, cmpv, newv, idx, memop);
96
+ return;
97
}
98
+
99
+ memop = tcg_canonicalize_memop(memop, 0, 0);
100
+ gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
101
+ tcg_debug_assert(gen != NULL);
102
+
103
+ oi = make_memop_idx(memop & ~MO_SIGN, idx);
104
+ gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
105
+
106
+ if (memop & MO_SIGN) {
107
+ tcg_gen_ext_i32(retv, retv, memop);
108
+ }
109
+}
110
+
111
+void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
112
+ TCGv_i64 newv, TCGArg idx, MemOp memop)
113
+{
114
+ TCGv_i64 t1, t2;
115
+
116
+ if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
117
+ tcg_gen_nonatomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
118
+ TCGV_LOW(newv), idx, memop);
119
+ if (memop & MO_SIGN) {
120
+ tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
121
+ } else {
122
+ tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
123
+ }
124
+ return;
125
+ }
126
+
127
+ t1 = tcg_temp_new_i64();
128
+ t2 = tcg_temp_new_i64();
129
+
130
+ tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
131
+
132
+ tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
133
+ tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
134
+ tcg_gen_qemu_st_i64(t2, addr, idx, memop);
135
+ tcg_temp_free_i64(t2);
136
+
137
+ if (memop & MO_SIGN) {
138
+ tcg_gen_ext_i64(retv, t1, memop);
139
+ } else {
140
+ tcg_gen_mov_i64(retv, t1);
141
+ }
142
+ tcg_temp_free_i64(t1);
143
}
144
145
void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
146
TCGv_i64 newv, TCGArg idx, MemOp memop)
147
{
148
- memop = tcg_canonicalize_memop(memop, 1, 0);
149
-
150
if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
151
- TCGv_i64 t1 = tcg_temp_new_i64();
152
- TCGv_i64 t2 = tcg_temp_new_i64();
153
+ tcg_gen_nonatomic_cmpxchg_i64(retv, addr, cmpv, newv, idx, memop);
154
+ return;
155
+ }
156
157
- tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
158
-
159
- tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
160
- tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
161
- tcg_gen_qemu_st_i64(t2, addr, idx, memop);
162
- tcg_temp_free_i64(t2);
163
-
164
- if (memop & MO_SIGN) {
165
- tcg_gen_ext_i64(retv, t1, memop);
166
- } else {
167
- tcg_gen_mov_i64(retv, t1);
168
- }
169
- tcg_temp_free_i64(t1);
170
- } else if ((memop & MO_SIZE) == MO_64) {
171
-#ifdef CONFIG_ATOMIC64
172
+ if ((memop & MO_SIZE) == MO_64) {
173
gen_atomic_cx_i64 gen;
174
- MemOpIdx oi;
175
176
+ memop = tcg_canonicalize_memop(memop, 1, 0);
177
gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
178
- tcg_debug_assert(gen != NULL);
179
+ if (gen) {
180
+ MemOpIdx oi = make_memop_idx(memop, idx);
181
+ gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
182
+ return;
183
+ }
184
185
- oi = make_memop_idx(memop, idx);
186
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
187
-#else
188
gen_helper_exit_atomic(cpu_env);
189
- /* Produce a result, so that we have a well-formed opcode stream
190
- with respect to uses of the result in the (dead) code following. */
191
+
192
+ /*
193
+ * Produce a result for a well-formed opcode stream. This satisfies
194
+ * liveness for set before used, which happens before this dead code
195
+ * is removed.
196
+ */
197
tcg_gen_movi_i64(retv, 0);
198
-#endif /* CONFIG_ATOMIC64 */
199
+ return;
200
+ }
201
+
202
+ if (TCG_TARGET_REG_BITS == 32) {
203
+ tcg_gen_atomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
204
+ TCGV_LOW(newv), idx, memop);
205
+ if (memop & MO_SIGN) {
206
+ tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
207
+ } else {
208
+ tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
209
+ }
210
} else {
211
TCGv_i32 c32 = tcg_temp_new_i32();
212
TCGv_i32 n32 = tcg_temp_new_i32();
213
--
214
2.34.1
215
216
diff view generated by jsdifflib
New patch
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Message-Id: <20221112042555.2622152-2-richard.henderson@linaro.org>
4
---
5
target/arm/helper-a64.h | 6 ---
6
target/arm/helper-a64.c | 104 -------------------------------------
7
target/arm/translate-a64.c | 60 ++++++++++++---------
8
3 files changed, 35 insertions(+), 135 deletions(-)
1
9
10
diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/arm/helper-a64.h
13
+++ b/target/arm/helper-a64.h
14
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_2(frecpx_f16, TCG_CALL_NO_RWG, f16, f16, ptr)
15
DEF_HELPER_FLAGS_2(fcvtx_f64_to_f32, TCG_CALL_NO_RWG, f32, f64, env)
16
DEF_HELPER_FLAGS_3(crc32_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
17
DEF_HELPER_FLAGS_3(crc32c_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
18
-DEF_HELPER_FLAGS_4(paired_cmpxchg64_le, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
19
-DEF_HELPER_FLAGS_4(paired_cmpxchg64_le_parallel, TCG_CALL_NO_WG,
20
- i64, env, i64, i64, i64)
21
-DEF_HELPER_FLAGS_4(paired_cmpxchg64_be, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
22
-DEF_HELPER_FLAGS_4(paired_cmpxchg64_be_parallel, TCG_CALL_NO_WG,
23
- i64, env, i64, i64, i64)
24
DEF_HELPER_5(casp_le_parallel, void, env, i32, i64, i64, i64)
25
DEF_HELPER_5(casp_be_parallel, void, env, i32, i64, i64, i64)
26
DEF_HELPER_FLAGS_3(advsimd_maxh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
27
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/helper-a64.c
30
+++ b/target/arm/helper-a64.c
31
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes)
32
return crc32c(acc, buf, bytes) ^ 0xffffffff;
33
}
34
35
-uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr,
36
- uint64_t new_lo, uint64_t new_hi)
37
-{
38
- Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
39
- Int128 newv = int128_make128(new_lo, new_hi);
40
- Int128 oldv;
41
- uintptr_t ra = GETPC();
42
- uint64_t o0, o1;
43
- bool success;
44
- int mem_idx = cpu_mmu_index(env, false);
45
- MemOpIdx oi0 = make_memop_idx(MO_LEUQ | MO_ALIGN_16, mem_idx);
46
- MemOpIdx oi1 = make_memop_idx(MO_LEUQ, mem_idx);
47
-
48
- o0 = cpu_ldq_le_mmu(env, addr + 0, oi0, ra);
49
- o1 = cpu_ldq_le_mmu(env, addr + 8, oi1, ra);
50
- oldv = int128_make128(o0, o1);
51
-
52
- success = int128_eq(oldv, cmpv);
53
- if (success) {
54
- cpu_stq_le_mmu(env, addr + 0, int128_getlo(newv), oi1, ra);
55
- cpu_stq_le_mmu(env, addr + 8, int128_gethi(newv), oi1, ra);
56
- }
57
-
58
- return !success;
59
-}
60
-
61
-uint64_t HELPER(paired_cmpxchg64_le_parallel)(CPUARMState *env, uint64_t addr,
62
- uint64_t new_lo, uint64_t new_hi)
63
-{
64
- Int128 oldv, cmpv, newv;
65
- uintptr_t ra = GETPC();
66
- bool success;
67
- int mem_idx;
68
- MemOpIdx oi;
69
-
70
- assert(HAVE_CMPXCHG128);
71
-
72
- mem_idx = cpu_mmu_index(env, false);
73
- oi = make_memop_idx(MO_LE | MO_128 | MO_ALIGN, mem_idx);
74
-
75
- cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
76
- newv = int128_make128(new_lo, new_hi);
77
- oldv = cpu_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
78
-
79
- success = int128_eq(oldv, cmpv);
80
- return !success;
81
-}
82
-
83
-uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr,
84
- uint64_t new_lo, uint64_t new_hi)
85
-{
86
- /*
87
- * High and low need to be switched here because this is not actually a
88
- * 128bit store but two doublewords stored consecutively
89
- */
90
- Int128 cmpv = int128_make128(env->exclusive_high, env->exclusive_val);
91
- Int128 newv = int128_make128(new_hi, new_lo);
92
- Int128 oldv;
93
- uintptr_t ra = GETPC();
94
- uint64_t o0, o1;
95
- bool success;
96
- int mem_idx = cpu_mmu_index(env, false);
97
- MemOpIdx oi0 = make_memop_idx(MO_BEUQ | MO_ALIGN_16, mem_idx);
98
- MemOpIdx oi1 = make_memop_idx(MO_BEUQ, mem_idx);
99
-
100
- o1 = cpu_ldq_be_mmu(env, addr + 0, oi0, ra);
101
- o0 = cpu_ldq_be_mmu(env, addr + 8, oi1, ra);
102
- oldv = int128_make128(o0, o1);
103
-
104
- success = int128_eq(oldv, cmpv);
105
- if (success) {
106
- cpu_stq_be_mmu(env, addr + 0, int128_gethi(newv), oi1, ra);
107
- cpu_stq_be_mmu(env, addr + 8, int128_getlo(newv), oi1, ra);
108
- }
109
-
110
- return !success;
111
-}
112
-
113
-uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr,
114
- uint64_t new_lo, uint64_t new_hi)
115
-{
116
- Int128 oldv, cmpv, newv;
117
- uintptr_t ra = GETPC();
118
- bool success;
119
- int mem_idx;
120
- MemOpIdx oi;
121
-
122
- assert(HAVE_CMPXCHG128);
123
-
124
- mem_idx = cpu_mmu_index(env, false);
125
- oi = make_memop_idx(MO_BE | MO_128 | MO_ALIGN, mem_idx);
126
-
127
- /*
128
- * High and low need to be switched here because this is not actually a
129
- * 128bit store but two doublewords stored consecutively
130
- */
131
- cmpv = int128_make128(env->exclusive_high, env->exclusive_val);
132
- newv = int128_make128(new_hi, new_lo);
133
- oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
134
-
135
- success = int128_eq(oldv, cmpv);
136
- return !success;
137
-}
138
-
139
/* Writes back the old data into Rs. */
140
void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
141
uint64_t new_lo, uint64_t new_hi)
142
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
143
index XXXXXXX..XXXXXXX 100644
144
--- a/target/arm/translate-a64.c
145
+++ b/target/arm/translate-a64.c
146
@@ -XXX,XX +XXX,XX @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
147
get_mem_index(s),
148
MO_64 | MO_ALIGN | s->be_data);
149
tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
150
- } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
151
- if (!HAVE_CMPXCHG128) {
152
- gen_helper_exit_atomic(cpu_env);
153
- /*
154
- * Produce a result so we have a well-formed opcode
155
- * stream when the following (dead) code uses 'tmp'.
156
- * TCG will remove the dead ops for us.
157
- */
158
- tcg_gen_movi_i64(tmp, 0);
159
- } else if (s->be_data == MO_LE) {
160
- gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env,
161
- cpu_exclusive_addr,
162
- cpu_reg(s, rt),
163
- cpu_reg(s, rt2));
164
- } else {
165
- gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env,
166
- cpu_exclusive_addr,
167
- cpu_reg(s, rt),
168
- cpu_reg(s, rt2));
169
- }
170
- } else if (s->be_data == MO_LE) {
171
- gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr,
172
- cpu_reg(s, rt), cpu_reg(s, rt2));
173
} else {
174
- gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr,
175
- cpu_reg(s, rt), cpu_reg(s, rt2));
176
+ TCGv_i128 t16 = tcg_temp_new_i128();
177
+ TCGv_i128 c16 = tcg_temp_new_i128();
178
+ TCGv_i64 a, b;
179
+
180
+ if (s->be_data == MO_LE) {
181
+ tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2));
182
+ tcg_gen_concat_i64_i128(c16, cpu_exclusive_val,
183
+ cpu_exclusive_high);
184
+ } else {
185
+ tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt));
186
+ tcg_gen_concat_i64_i128(c16, cpu_exclusive_high,
187
+ cpu_exclusive_val);
188
+ }
189
+
190
+ tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16,
191
+ get_mem_index(s),
192
+ MO_128 | MO_ALIGN | s->be_data);
193
+ tcg_temp_free_i128(c16);
194
+
195
+ a = tcg_temp_new_i64();
196
+ b = tcg_temp_new_i64();
197
+ if (s->be_data == MO_LE) {
198
+ tcg_gen_extr_i128_i64(a, b, t16);
199
+ } else {
200
+ tcg_gen_extr_i128_i64(b, a, t16);
201
+ }
202
+
203
+ tcg_gen_xor_i64(a, a, cpu_exclusive_val);
204
+ tcg_gen_xor_i64(b, b, cpu_exclusive_high);
205
+ tcg_gen_or_i64(tmp, a, b);
206
+ tcg_temp_free_i64(a);
207
+ tcg_temp_free_i64(b);
208
+ tcg_temp_free_i128(t16);
209
+
210
+ tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0);
211
}
212
} else {
213
tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
214
--
215
2.34.1
diff view generated by jsdifflib
New patch
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Message-Id: <20221112042555.2622152-3-richard.henderson@linaro.org>
4
---
5
target/arm/helper-a64.h | 2 --
6
target/arm/helper-a64.c | 43 ---------------------------
7
target/arm/translate-a64.c | 61 +++++++++++---------------------------
8
3 files changed, 18 insertions(+), 88 deletions(-)
1
9
10
diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/arm/helper-a64.h
13
+++ b/target/arm/helper-a64.h
14
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_2(frecpx_f16, TCG_CALL_NO_RWG, f16, f16, ptr)
15
DEF_HELPER_FLAGS_2(fcvtx_f64_to_f32, TCG_CALL_NO_RWG, f32, f64, env)
16
DEF_HELPER_FLAGS_3(crc32_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
17
DEF_HELPER_FLAGS_3(crc32c_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
18
-DEF_HELPER_5(casp_le_parallel, void, env, i32, i64, i64, i64)
19
-DEF_HELPER_5(casp_be_parallel, void, env, i32, i64, i64, i64)
20
DEF_HELPER_FLAGS_3(advsimd_maxh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
21
DEF_HELPER_FLAGS_3(advsimd_minh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
22
DEF_HELPER_FLAGS_3(advsimd_maxnumh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
23
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
24
index XXXXXXX..XXXXXXX 100644
25
--- a/target/arm/helper-a64.c
26
+++ b/target/arm/helper-a64.c
27
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes)
28
return crc32c(acc, buf, bytes) ^ 0xffffffff;
29
}
30
31
-/* Writes back the old data into Rs. */
32
-void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
33
- uint64_t new_lo, uint64_t new_hi)
34
-{
35
- Int128 oldv, cmpv, newv;
36
- uintptr_t ra = GETPC();
37
- int mem_idx;
38
- MemOpIdx oi;
39
-
40
- assert(HAVE_CMPXCHG128);
41
-
42
- mem_idx = cpu_mmu_index(env, false);
43
- oi = make_memop_idx(MO_LE | MO_128 | MO_ALIGN, mem_idx);
44
-
45
- cmpv = int128_make128(env->xregs[rs], env->xregs[rs + 1]);
46
- newv = int128_make128(new_lo, new_hi);
47
- oldv = cpu_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
48
-
49
- env->xregs[rs] = int128_getlo(oldv);
50
- env->xregs[rs + 1] = int128_gethi(oldv);
51
-}
52
-
53
-void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
54
- uint64_t new_hi, uint64_t new_lo)
55
-{
56
- Int128 oldv, cmpv, newv;
57
- uintptr_t ra = GETPC();
58
- int mem_idx;
59
- MemOpIdx oi;
60
-
61
- assert(HAVE_CMPXCHG128);
62
-
63
- mem_idx = cpu_mmu_index(env, false);
64
- oi = make_memop_idx(MO_LE | MO_128 | MO_ALIGN, mem_idx);
65
-
66
- cmpv = int128_make128(env->xregs[rs + 1], env->xregs[rs]);
67
- newv = int128_make128(new_lo, new_hi);
68
- oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
69
-
70
- env->xregs[rs + 1] = int128_getlo(oldv);
71
- env->xregs[rs] = int128_gethi(oldv);
72
-}
73
-
74
/*
75
* AdvSIMD half-precision
76
*/
77
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
78
index XXXXXXX..XXXXXXX 100644
79
--- a/target/arm/translate-a64.c
80
+++ b/target/arm/translate-a64.c
81
@@ -XXX,XX +XXX,XX @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
82
tcg_gen_extr32_i64(s2, s1, cmp);
83
}
84
tcg_temp_free_i64(cmp);
85
- } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
86
- if (HAVE_CMPXCHG128) {
87
- TCGv_i32 tcg_rs = tcg_constant_i32(rs);
88
- if (s->be_data == MO_LE) {
89
- gen_helper_casp_le_parallel(cpu_env, tcg_rs,
90
- clean_addr, t1, t2);
91
- } else {
92
- gen_helper_casp_be_parallel(cpu_env, tcg_rs,
93
- clean_addr, t1, t2);
94
- }
95
- } else {
96
- gen_helper_exit_atomic(cpu_env);
97
- s->base.is_jmp = DISAS_NORETURN;
98
- }
99
} else {
100
- TCGv_i64 d1 = tcg_temp_new_i64();
101
- TCGv_i64 d2 = tcg_temp_new_i64();
102
- TCGv_i64 a2 = tcg_temp_new_i64();
103
- TCGv_i64 c1 = tcg_temp_new_i64();
104
- TCGv_i64 c2 = tcg_temp_new_i64();
105
- TCGv_i64 zero = tcg_constant_i64(0);
106
+ TCGv_i128 cmp = tcg_temp_new_i128();
107
+ TCGv_i128 val = tcg_temp_new_i128();
108
109
- /* Load the two words, in memory order. */
110
- tcg_gen_qemu_ld_i64(d1, clean_addr, memidx,
111
- MO_64 | MO_ALIGN_16 | s->be_data);
112
- tcg_gen_addi_i64(a2, clean_addr, 8);
113
- tcg_gen_qemu_ld_i64(d2, a2, memidx, MO_64 | s->be_data);
114
+ if (s->be_data == MO_LE) {
115
+ tcg_gen_concat_i64_i128(val, t1, t2);
116
+ tcg_gen_concat_i64_i128(cmp, s1, s2);
117
+ } else {
118
+ tcg_gen_concat_i64_i128(val, t2, t1);
119
+ tcg_gen_concat_i64_i128(cmp, s2, s1);
120
+ }
121
122
- /* Compare the two words, also in memory order. */
123
- tcg_gen_setcond_i64(TCG_COND_EQ, c1, d1, s1);
124
- tcg_gen_setcond_i64(TCG_COND_EQ, c2, d2, s2);
125
- tcg_gen_and_i64(c2, c2, c1);
126
+ tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx,
127
+ MO_128 | MO_ALIGN | s->be_data);
128
+ tcg_temp_free_i128(val);
129
130
- /* If compare equal, write back new data, else write back old data. */
131
- tcg_gen_movcond_i64(TCG_COND_NE, c1, c2, zero, t1, d1);
132
- tcg_gen_movcond_i64(TCG_COND_NE, c2, c2, zero, t2, d2);
133
- tcg_gen_qemu_st_i64(c1, clean_addr, memidx, MO_64 | s->be_data);
134
- tcg_gen_qemu_st_i64(c2, a2, memidx, MO_64 | s->be_data);
135
- tcg_temp_free_i64(a2);
136
- tcg_temp_free_i64(c1);
137
- tcg_temp_free_i64(c2);
138
-
139
- /* Write back the data from memory to Rs. */
140
- tcg_gen_mov_i64(s1, d1);
141
- tcg_gen_mov_i64(s2, d2);
142
- tcg_temp_free_i64(d1);
143
- tcg_temp_free_i64(d2);
144
+ if (s->be_data == MO_LE) {
145
+ tcg_gen_extr_i128_i64(s1, s2, cmp);
146
+ } else {
147
+ tcg_gen_extr_i128_i64(s2, s1, cmp);
148
+ }
149
+ tcg_temp_free_i128(cmp);
150
}
151
}
152
153
--
154
2.34.1
diff view generated by jsdifflib
New patch
1
1
Note that the previous direct reference to reserve_val,
2
3
- tcg_gen_ld_i64(t1, cpu_env, (ctx->le_mode
4
- ? offsetof(CPUPPCState, reserve_val2)
5
- : offsetof(CPUPPCState, reserve_val)));
6
7
was incorrect because all references should have gone through
8
cpu_reserve_val. Create a cpu_reserve_val2 tcg temp to fix this.
9
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
12
Message-Id: <20221112061122.2720163-2-richard.henderson@linaro.org>
13
---
14
target/ppc/helper.h | 2 -
15
target/ppc/mem_helper.c | 44 -----------------
16
target/ppc/translate.c | 102 ++++++++++++++++++----------------------
17
3 files changed, 47 insertions(+), 101 deletions(-)
18
19
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
20
index XXXXXXX..XXXXXXX 100644
21
--- a/target/ppc/helper.h
22
+++ b/target/ppc/helper.h
23
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(stq_le_parallel, TCG_CALL_NO_WG,
24
void, env, tl, i64, i64, i32)
25
DEF_HELPER_FLAGS_5(stq_be_parallel, TCG_CALL_NO_WG,
26
void, env, tl, i64, i64, i32)
27
-DEF_HELPER_5(stqcx_le_parallel, i32, env, tl, i64, i64, i32)
28
-DEF_HELPER_5(stqcx_be_parallel, i32, env, tl, i64, i64, i32)
29
#endif
30
diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/target/ppc/mem_helper.c
33
+++ b/target/ppc/mem_helper.c
34
@@ -XXX,XX +XXX,XX @@ void helper_stq_be_parallel(CPUPPCState *env, target_ulong addr,
35
val = int128_make128(lo, hi);
36
cpu_atomic_sto_be_mmu(env, addr, val, opidx, GETPC());
37
}
38
-
39
-uint32_t helper_stqcx_le_parallel(CPUPPCState *env, target_ulong addr,
40
- uint64_t new_lo, uint64_t new_hi,
41
- uint32_t opidx)
42
-{
43
- bool success = false;
44
-
45
- /* We will have raised EXCP_ATOMIC from the translator. */
46
- assert(HAVE_CMPXCHG128);
47
-
48
- if (likely(addr == env->reserve_addr)) {
49
- Int128 oldv, cmpv, newv;
50
-
51
- cmpv = int128_make128(env->reserve_val2, env->reserve_val);
52
- newv = int128_make128(new_lo, new_hi);
53
- oldv = cpu_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv,
54
- opidx, GETPC());
55
- success = int128_eq(oldv, cmpv);
56
- }
57
- env->reserve_addr = -1;
58
- return env->so + success * CRF_EQ_BIT;
59
-}
60
-
61
-uint32_t helper_stqcx_be_parallel(CPUPPCState *env, target_ulong addr,
62
- uint64_t new_lo, uint64_t new_hi,
63
- uint32_t opidx)
64
-{
65
- bool success = false;
66
-
67
- /* We will have raised EXCP_ATOMIC from the translator. */
68
- assert(HAVE_CMPXCHG128);
69
-
70
- if (likely(addr == env->reserve_addr)) {
71
- Int128 oldv, cmpv, newv;
72
-
73
- cmpv = int128_make128(env->reserve_val2, env->reserve_val);
74
- newv = int128_make128(new_lo, new_hi);
75
- oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv,
76
- opidx, GETPC());
77
- success = int128_eq(oldv, cmpv);
78
- }
79
- env->reserve_addr = -1;
80
- return env->so + success * CRF_EQ_BIT;
81
-}
82
#endif
83
84
/*****************************************************************************/
85
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
86
index XXXXXXX..XXXXXXX 100644
87
--- a/target/ppc/translate.c
88
+++ b/target/ppc/translate.c
89
@@ -XXX,XX +XXX,XX @@ static TCGv cpu_cfar;
90
static TCGv cpu_xer, cpu_so, cpu_ov, cpu_ca, cpu_ov32, cpu_ca32;
91
static TCGv cpu_reserve;
92
static TCGv cpu_reserve_val;
93
+static TCGv cpu_reserve_val2;
94
static TCGv cpu_fpscr;
95
static TCGv_i32 cpu_access_type;
96
97
@@ -XXX,XX +XXX,XX @@ void ppc_translate_init(void)
98
offsetof(CPUPPCState, reserve_addr),
99
"reserve_addr");
100
cpu_reserve_val = tcg_global_mem_new(cpu_env,
101
- offsetof(CPUPPCState, reserve_val),
102
- "reserve_val");
103
+ offsetof(CPUPPCState, reserve_val),
104
+ "reserve_val");
105
+ cpu_reserve_val2 = tcg_global_mem_new(cpu_env,
106
+ offsetof(CPUPPCState, reserve_val2),
107
+ "reserve_val2");
108
109
cpu_fpscr = tcg_global_mem_new(cpu_env,
110
offsetof(CPUPPCState, fpscr), "fpscr");
111
@@ -XXX,XX +XXX,XX @@ static void gen_lqarx(DisasContext *ctx)
112
/* stqcx. */
113
static void gen_stqcx_(DisasContext *ctx)
114
{
115
+ TCGLabel *lab_fail, *lab_over;
116
int rs = rS(ctx->opcode);
117
- TCGv EA, hi, lo;
118
+ TCGv EA, t0, t1;
119
+ TCGv_i128 cmp, val;
120
121
if (unlikely(rs & 1)) {
122
gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
123
return;
124
}
125
126
+ lab_fail = gen_new_label();
127
+ lab_over = gen_new_label();
128
+
129
gen_set_access_type(ctx, ACCESS_RES);
130
EA = tcg_temp_new();
131
gen_addr_reg_index(ctx, EA);
132
133
+ tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, lab_fail);
134
+ tcg_temp_free(EA);
135
+
136
+ cmp = tcg_temp_new_i128();
137
+ val = tcg_temp_new_i128();
138
+
139
+ tcg_gen_concat_i64_i128(cmp, cpu_reserve_val2, cpu_reserve_val);
140
+
141
/* Note that the low part is always in RS+1, even in LE mode. */
142
- lo = cpu_gpr[rs + 1];
143
- hi = cpu_gpr[rs];
144
+ tcg_gen_concat_i64_i128(val, cpu_gpr[rs + 1], cpu_gpr[rs]);
145
146
- if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
147
- if (HAVE_CMPXCHG128) {
148
- TCGv_i32 oi = tcg_const_i32(DEF_MEMOP(MO_128) | MO_ALIGN);
149
- if (ctx->le_mode) {
150
- gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env,
151
- EA, lo, hi, oi);
152
- } else {
153
- gen_helper_stqcx_be_parallel(cpu_crf[0], cpu_env,
154
- EA, lo, hi, oi);
155
- }
156
- tcg_temp_free_i32(oi);
157
- } else {
158
- /* Restart with exclusive lock. */
159
- gen_helper_exit_atomic(cpu_env);
160
- ctx->base.is_jmp = DISAS_NORETURN;
161
- }
162
- tcg_temp_free(EA);
163
- } else {
164
- TCGLabel *lab_fail = gen_new_label();
165
- TCGLabel *lab_over = gen_new_label();
166
- TCGv_i64 t0 = tcg_temp_new_i64();
167
- TCGv_i64 t1 = tcg_temp_new_i64();
168
+ tcg_gen_atomic_cmpxchg_i128(val, cpu_reserve, cmp, val, ctx->mem_idx,
169
+ DEF_MEMOP(MO_128 | MO_ALIGN));
170
+ tcg_temp_free_i128(cmp);
171
172
- tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, lab_fail);
173
- tcg_temp_free(EA);
174
+ t0 = tcg_temp_new();
175
+ t1 = tcg_temp_new();
176
+ tcg_gen_extr_i128_i64(t1, t0, val);
177
+ tcg_temp_free_i128(val);
178
179
- gen_qemu_ld64_i64(ctx, t0, cpu_reserve);
180
- tcg_gen_ld_i64(t1, cpu_env, (ctx->le_mode
181
- ? offsetof(CPUPPCState, reserve_val2)
182
- : offsetof(CPUPPCState, reserve_val)));
183
- tcg_gen_brcond_i64(TCG_COND_NE, t0, t1, lab_fail);
184
+ tcg_gen_xor_tl(t1, t1, cpu_reserve_val2);
185
+ tcg_gen_xor_tl(t0, t0, cpu_reserve_val);
186
+ tcg_gen_or_tl(t0, t0, t1);
187
+ tcg_temp_free(t1);
188
189
- tcg_gen_addi_i64(t0, cpu_reserve, 8);
190
- gen_qemu_ld64_i64(ctx, t0, t0);
191
- tcg_gen_ld_i64(t1, cpu_env, (ctx->le_mode
192
- ? offsetof(CPUPPCState, reserve_val)
193
- : offsetof(CPUPPCState, reserve_val2)));
194
- tcg_gen_brcond_i64(TCG_COND_NE, t0, t1, lab_fail);
195
+ tcg_gen_setcondi_tl(TCG_COND_EQ, t0, t0, 0);
196
+ tcg_gen_shli_tl(t0, t0, CRF_EQ_BIT);
197
+ tcg_gen_or_tl(t0, t0, cpu_so);
198
+ tcg_gen_trunc_tl_i32(cpu_crf[0], t0);
199
+ tcg_temp_free(t0);
200
201
- /* Success */
202
- gen_qemu_st64_i64(ctx, ctx->le_mode ? lo : hi, cpu_reserve);
203
- tcg_gen_addi_i64(t0, cpu_reserve, 8);
204
- gen_qemu_st64_i64(ctx, ctx->le_mode ? hi : lo, t0);
205
+ tcg_gen_br(lab_over);
206
+ gen_set_label(lab_fail);
207
208
- tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
209
- tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], CRF_EQ);
210
- tcg_gen_br(lab_over);
211
+ /*
212
+ * Address mismatch implies failure. But we still need to provide
213
+ * the memory barrier semantics of the instruction.
214
+ */
215
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
216
+ tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
217
218
- gen_set_label(lab_fail);
219
- tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
220
-
221
- gen_set_label(lab_over);
222
- tcg_gen_movi_tl(cpu_reserve, -1);
223
- tcg_temp_free_i64(t0);
224
- tcg_temp_free_i64(t1);
225
- }
226
+ gen_set_label(lab_over);
227
+ tcg_gen_movi_tl(cpu_reserve, -1);
228
}
229
#endif /* defined(TARGET_PPC64) */
230
231
--
232
2.34.1
diff view generated by jsdifflib
New patch
1
From: Ilya Leoshkevich <iii@linux.ibm.com>
1
2
3
Add a basic test to prevent regressions.
4
5
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
6
Message-Id: <20221101111300.2539919-1-iii@linux.ibm.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tests/tcg/s390x/div.c | 40 +++++++++++++++++++++++++++++++++
10
tests/tcg/s390x/Makefile.target | 1 +
11
2 files changed, 41 insertions(+)
12
create mode 100644 tests/tcg/s390x/div.c
13
14
diff --git a/tests/tcg/s390x/div.c b/tests/tcg/s390x/div.c
15
new file mode 100644
16
index XXXXXXX..XXXXXXX
17
--- /dev/null
18
+++ b/tests/tcg/s390x/div.c
19
@@ -XXX,XX +XXX,XX @@
20
+#include <assert.h>
21
+#include <stdint.h>
22
+
23
+static void test_dr(void)
24
+{
25
+ register int32_t r0 asm("r0") = -1;
26
+ register int32_t r1 asm("r1") = -4241;
27
+ int32_t b = 101, q, r;
28
+
29
+ asm("dr %[r0],%[b]"
30
+ : [r0] "+r" (r0), [r1] "+r" (r1)
31
+ : [b] "r" (b)
32
+ : "cc");
33
+ q = r1;
34
+ r = r0;
35
+ assert(q == -41);
36
+ assert(r == -100);
37
+}
38
+
39
+static void test_dlr(void)
40
+{
41
+ register uint32_t r0 asm("r0") = 0;
42
+ register uint32_t r1 asm("r1") = 4243;
43
+ uint32_t b = 101, q, r;
44
+
45
+ asm("dlr %[r0],%[b]"
46
+ : [r0] "+r" (r0), [r1] "+r" (r1)
47
+ : [b] "r" (b)
48
+ : "cc");
49
+ q = r1;
50
+ r = r0;
51
+ assert(q == 42);
52
+ assert(r == 1);
53
+}
54
+
55
+int main(void)
56
+{
57
+ test_dr();
58
+ test_dlr();
59
+}
60
diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
61
index XXXXXXX..XXXXXXX 100644
62
--- a/tests/tcg/s390x/Makefile.target
63
+++ b/tests/tcg/s390x/Makefile.target
64
@@ -XXX,XX +XXX,XX @@ TESTS+=trap
65
TESTS+=signals-s390x
66
TESTS+=branch-relative-long
67
TESTS+=noexec
68
+TESTS+=div
69
70
Z13_TESTS=vistr
71
$(Z13_TESTS): CFLAGS+=-march=z13 -O2
72
--
73
2.34.1
diff view generated by jsdifflib
New patch
1
From: Ilya Leoshkevich <iii@linux.ibm.com>
1
2
3
Add a basic test to prevent regressions.
4
5
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
6
Message-Id: <20221025213008.2209006-2-iii@linux.ibm.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tests/tcg/s390x/clst.c | 82 +++++++++++++++++++++++++++++++++
10
tests/tcg/s390x/Makefile.target | 1 +
11
2 files changed, 83 insertions(+)
12
create mode 100644 tests/tcg/s390x/clst.c
13
14
diff --git a/tests/tcg/s390x/clst.c b/tests/tcg/s390x/clst.c
15
new file mode 100644
16
index XXXXXXX..XXXXXXX
17
--- /dev/null
18
+++ b/tests/tcg/s390x/clst.c
19
@@ -XXX,XX +XXX,XX @@
20
+#define _GNU_SOURCE
21
+#include <stdio.h>
22
+#include <stdlib.h>
23
+
24
+static int clst(char sep, const char **s1, const char **s2)
25
+{
26
+ const char *r1 = *s1;
27
+ const char *r2 = *s2;
28
+ int cc;
29
+
30
+ do {
31
+ register int r0 asm("r0") = sep;
32
+
33
+ asm("clst %[r1],%[r2]\n"
34
+ "ipm %[cc]\n"
35
+ "srl %[cc],28"
36
+ : [r1] "+r" (r1), [r2] "+r" (r2), "+r" (r0), [cc] "=r" (cc)
37
+ :
38
+ : "cc");
39
+ *s1 = r1;
40
+ *s2 = r2;
41
+ } while (cc == 3);
42
+
43
+ return cc;
44
+}
45
+
46
+static const struct test {
47
+ const char *name;
48
+ char sep;
49
+ const char *s1;
50
+ const char *s2;
51
+ int exp_cc;
52
+ int exp_off;
53
+} tests[] = {
54
+ {
55
+ .name = "cc0",
56
+ .sep = 0,
57
+ .s1 = "aa",
58
+ .s2 = "aa",
59
+ .exp_cc = 0,
60
+ .exp_off = 0,
61
+ },
62
+ {
63
+ .name = "cc1",
64
+ .sep = 1,
65
+ .s1 = "a\x01",
66
+ .s2 = "aa\x01",
67
+ .exp_cc = 1,
68
+ .exp_off = 1,
69
+ },
70
+ {
71
+ .name = "cc2",
72
+ .sep = 2,
73
+ .s1 = "abc\x02",
74
+ .s2 = "abb\x02",
75
+ .exp_cc = 2,
76
+ .exp_off = 2,
77
+ },
78
+};
79
+
80
+int main(void)
81
+{
82
+ const struct test *t;
83
+ const char *s1, *s2;
84
+ size_t i;
85
+ int cc;
86
+
87
+ for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
88
+ t = &tests[i];
89
+ s1 = t->s1;
90
+ s2 = t->s2;
91
+ cc = clst(t->sep, &s1, &s2);
92
+ if (cc != t->exp_cc ||
93
+ s1 != t->s1 + t->exp_off ||
94
+ s2 != t->s2 + t->exp_off) {
95
+ fprintf(stderr, "%s\n", t->name);
96
+ return EXIT_FAILURE;
97
+ }
98
+ }
99
+
100
+ return EXIT_SUCCESS;
101
+}
102
diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
103
index XXXXXXX..XXXXXXX 100644
104
--- a/tests/tcg/s390x/Makefile.target
105
+++ b/tests/tcg/s390x/Makefile.target
106
@@ -XXX,XX +XXX,XX @@ TESTS+=signals-s390x
107
TESTS+=branch-relative-long
108
TESTS+=noexec
109
TESTS+=div
110
+TESTS+=clst
111
112
Z13_TESTS=vistr
113
$(Z13_TESTS): CFLAGS+=-march=z13 -O2
114
--
115
2.34.1
diff view generated by jsdifflib
New patch
1
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
2
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
tests/tcg/s390x/long-double.c | 24 ++++++++++++++++++++++++
6
tests/tcg/s390x/Makefile.target | 1 +
7
2 files changed, 25 insertions(+)
8
create mode 100644 tests/tcg/s390x/long-double.c
1
9
10
diff --git a/tests/tcg/s390x/long-double.c b/tests/tcg/s390x/long-double.c
11
new file mode 100644
12
index XXXXXXX..XXXXXXX
13
--- /dev/null
14
+++ b/tests/tcg/s390x/long-double.c
15
@@ -XXX,XX +XXX,XX @@
16
+/*
17
+ * Perform some basic arithmetic with long double, as a sanity check.
18
+ * With small integral numbers, we can cross-check with integers.
19
+ */
20
+
21
+#include <assert.h>
22
+
23
+int main()
24
+{
25
+ int i, j;
26
+
27
+ for (i = 1; i < 5; i++) {
28
+ for (j = 1; j < 5; j++) {
29
+ long double la = (long double)i + j;
30
+ long double lm = (long double)i * j;
31
+ long double ls = (long double)i - j;
32
+
33
+ assert(la == i + j);
34
+ assert(lm == i * j);
35
+ assert(ls == i - j);
36
+ }
37
+ }
38
+ return 0;
39
+}
40
diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
41
index XXXXXXX..XXXXXXX 100644
42
--- a/tests/tcg/s390x/Makefile.target
43
+++ b/tests/tcg/s390x/Makefile.target
44
@@ -XXX,XX +XXX,XX @@ TESTS+=branch-relative-long
45
TESTS+=noexec
46
TESTS+=div
47
TESTS+=clst
48
+TESTS+=long-double
49
50
Z13_TESTS=vistr
51
$(Z13_TESTS): CFLAGS+=-march=z13 -O2
52
--
53
2.34.1
54
55
diff view generated by jsdifflib
New patch
1
From: Ilya Leoshkevich <iii@linux.ibm.com>
1
2
3
Add a simple test to prevent regressions.
4
5
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
6
Message-Id: <20230201133257.3223115-1-iii@linux.ibm.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tests/tcg/s390x/cdsg.c | 93 +++++++++++++++++++++++++++++++++
10
tests/tcg/s390x/Makefile.target | 4 ++
11
2 files changed, 97 insertions(+)
12
create mode 100644 tests/tcg/s390x/cdsg.c
13
14
diff --git a/tests/tcg/s390x/cdsg.c b/tests/tcg/s390x/cdsg.c
15
new file mode 100644
16
index XXXXXXX..XXXXXXX
17
--- /dev/null
18
+++ b/tests/tcg/s390x/cdsg.c
19
@@ -XXX,XX +XXX,XX @@
20
+/*
21
+ * Test CDSG instruction.
22
+ *
23
+ * Increment the first half of aligned_quadword by 1, and the second half by 2
24
+ * from 2 threads. Verify that the result is consistent.
25
+ *
26
+ * SPDX-License-Identifier: GPL-2.0-or-later
27
+ */
28
+#include <assert.h>
29
+#include <pthread.h>
30
+#include <stdbool.h>
31
+#include <stdlib.h>
32
+
33
+static volatile bool start;
34
+typedef unsigned long aligned_quadword[2] __attribute__((__aligned__(16)));
35
+static aligned_quadword val;
36
+static const int n_iterations = 1000000;
37
+
38
+static inline int cdsg(unsigned long *orig0, unsigned long *orig1,
39
+ unsigned long new0, unsigned long new1,
40
+ aligned_quadword *mem)
41
+{
42
+ register unsigned long r0 asm("r0");
43
+ register unsigned long r1 asm("r1");
44
+ register unsigned long r2 asm("r2");
45
+ register unsigned long r3 asm("r3");
46
+ int cc;
47
+
48
+ r0 = *orig0;
49
+ r1 = *orig1;
50
+ r2 = new0;
51
+ r3 = new1;
52
+ asm("cdsg %[r0],%[r2],%[db2]\n"
53
+ "ipm %[cc]"
54
+ : [r0] "+r" (r0)
55
+ , [r1] "+r" (r1)
56
+ , [db2] "+m" (*mem)
57
+ , [cc] "=r" (cc)
58
+ : [r2] "r" (r2)
59
+ , [r3] "r" (r3)
60
+ : "cc");
61
+ *orig0 = r0;
62
+ *orig1 = r1;
63
+
64
+ return (cc >> 28) & 3;
65
+}
66
+
67
+void *cdsg_loop(void *arg)
68
+{
69
+ unsigned long orig0, orig1, new0, new1;
70
+ int cc;
71
+ int i;
72
+
73
+ while (!start) {
74
+ }
75
+
76
+ orig0 = val[0];
77
+ orig1 = val[1];
78
+ for (i = 0; i < n_iterations;) {
79
+ new0 = orig0 + 1;
80
+ new1 = orig1 + 2;
81
+
82
+ cc = cdsg(&orig0, &orig1, new0, new1, &val);
83
+
84
+ if (cc == 0) {
85
+ orig0 = new0;
86
+ orig1 = new1;
87
+ i++;
88
+ } else {
89
+ assert(cc == 1);
90
+ }
91
+ }
92
+
93
+ return NULL;
94
+}
95
+
96
+int main(void)
97
+{
98
+ pthread_t thread;
99
+ int ret;
100
+
101
+ ret = pthread_create(&thread, NULL, cdsg_loop, NULL);
102
+ assert(ret == 0);
103
+ start = true;
104
+ cdsg_loop(NULL);
105
+ ret = pthread_join(thread, NULL);
106
+ assert(ret == 0);
107
+
108
+ assert(val[0] == n_iterations * 2);
109
+ assert(val[1] == n_iterations * 4);
110
+
111
+ return EXIT_SUCCESS;
112
+}
113
diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
114
index XXXXXXX..XXXXXXX 100644
115
--- a/tests/tcg/s390x/Makefile.target
116
+++ b/tests/tcg/s390x/Makefile.target
117
@@ -XXX,XX +XXX,XX @@ TESTS+=noexec
118
TESTS+=div
119
TESTS+=clst
120
TESTS+=long-double
121
+TESTS+=cdsg
122
+
123
+cdsg: CFLAGS+=-pthread
124
+cdsg: LDFLAGS+=-pthread
125
126
Z13_TESTS=vistr
127
$(Z13_TESTS): CFLAGS+=-march=z13 -O2
128
--
129
2.34.1
diff view generated by jsdifflib
New patch
1
Pack the quotient and remainder into a single uint64_t.
1
2
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Reviewed-by: David Hildenbrand <david@redhat.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
v2: Fix operand ordering; use tcg_extr32_i64.
8
---
9
target/s390x/helper.h | 2 +-
10
target/s390x/tcg/int_helper.c | 26 +++++++++++++-------------
11
target/s390x/tcg/translate.c | 8 ++++----
12
3 files changed, 18 insertions(+), 18 deletions(-)
13
14
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/s390x/helper.h
17
+++ b/target/s390x/helper.h
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(clc, TCG_CALL_NO_WG, i32, env, i32, i64, i64)
19
DEF_HELPER_3(mvcl, i32, env, i32, i32)
20
DEF_HELPER_3(clcl, i32, env, i32, i32)
21
DEF_HELPER_FLAGS_4(clm, TCG_CALL_NO_WG, i32, env, i32, i32, i64)
22
-DEF_HELPER_FLAGS_3(divs32, TCG_CALL_NO_WG, s64, env, s64, s64)
23
+DEF_HELPER_FLAGS_3(divs32, TCG_CALL_NO_WG, i64, env, s64, s64)
24
DEF_HELPER_FLAGS_3(divu32, TCG_CALL_NO_WG, i64, env, i64, i64)
25
DEF_HELPER_FLAGS_3(divs64, TCG_CALL_NO_WG, s64, env, s64, s64)
26
DEF_HELPER_FLAGS_4(divu64, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
27
diff --git a/target/s390x/tcg/int_helper.c b/target/s390x/tcg/int_helper.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/s390x/tcg/int_helper.c
30
+++ b/target/s390x/tcg/int_helper.c
31
@@ -XXX,XX +XXX,XX @@
32
#endif
33
34
/* 64/32 -> 32 signed division */
35
-int64_t HELPER(divs32)(CPUS390XState *env, int64_t a, int64_t b64)
36
+uint64_t HELPER(divs32)(CPUS390XState *env, int64_t a, int64_t b64)
37
{
38
- int32_t ret, b = b64;
39
- int64_t q;
40
+ int32_t b = b64;
41
+ int64_t q, r;
42
43
if (b == 0) {
44
tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC());
45
}
46
47
- ret = q = a / b;
48
- env->retxl = a % b;
49
+ q = a / b;
50
+ r = a % b;
51
52
/* Catch non-representable quotient. */
53
- if (ret != q) {
54
+ if (q != (int32_t)q) {
55
tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC());
56
}
57
58
- return ret;
59
+ return deposit64(q, 32, 32, r);
60
}
61
62
/* 64/32 -> 32 unsigned division */
63
uint64_t HELPER(divu32)(CPUS390XState *env, uint64_t a, uint64_t b64)
64
{
65
- uint32_t ret, b = b64;
66
- uint64_t q;
67
+ uint32_t b = b64;
68
+ uint64_t q, r;
69
70
if (b == 0) {
71
tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC());
72
}
73
74
- ret = q = a / b;
75
- env->retxl = a % b;
76
+ q = a / b;
77
+ r = a % b;
78
79
/* Catch non-representable quotient. */
80
- if (ret != q) {
81
+ if (q != (uint32_t)q) {
82
tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC());
83
}
84
85
- return ret;
86
+ return deposit64(q, 32, 32, r);
87
}
88
89
/* 64/64 -> 64 signed division */
90
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
91
index XXXXXXX..XXXXXXX 100644
92
--- a/target/s390x/tcg/translate.c
93
+++ b/target/s390x/tcg/translate.c
94
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_diag(DisasContext *s, DisasOps *o)
95
96
static DisasJumpType op_divs32(DisasContext *s, DisasOps *o)
97
{
98
- gen_helper_divs32(o->out2, cpu_env, o->in1, o->in2);
99
- return_low128(o->out);
100
+ gen_helper_divs32(o->out, cpu_env, o->in1, o->in2);
101
+ tcg_gen_extr32_i64(o->out2, o->out, o->out);
102
return DISAS_NEXT;
103
}
104
105
static DisasJumpType op_divu32(DisasContext *s, DisasOps *o)
106
{
107
- gen_helper_divu32(o->out2, cpu_env, o->in1, o->in2);
108
- return_low128(o->out);
109
+ gen_helper_divu32(o->out, cpu_env, o->in1, o->in2);
110
+ tcg_gen_extr32_i64(o->out2, o->out, o->out);
111
return DISAS_NEXT;
112
}
113
114
--
115
2.34.1
116
117
diff view generated by jsdifflib
New patch
1
Pack the quotient and remainder into a single Int128.
2
Use the divu128 primitive to remove the cpu_abort on
3
32-bit hosts.
1
4
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
v2: Extended div test case to cover these insns.
10
---
11
target/s390x/helper.h | 4 ++--
12
target/s390x/tcg/int_helper.c | 38 +++++++++--------------------------
13
target/s390x/tcg/translate.c | 14 +++++++++----
14
tests/tcg/s390x/div.c | 35 ++++++++++++++++++++++++++++++++
15
4 files changed, 56 insertions(+), 35 deletions(-)
16
17
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/s390x/helper.h
20
+++ b/target/s390x/helper.h
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(clcl, i32, env, i32, i32)
22
DEF_HELPER_FLAGS_4(clm, TCG_CALL_NO_WG, i32, env, i32, i32, i64)
23
DEF_HELPER_FLAGS_3(divs32, TCG_CALL_NO_WG, i64, env, s64, s64)
24
DEF_HELPER_FLAGS_3(divu32, TCG_CALL_NO_WG, i64, env, i64, i64)
25
-DEF_HELPER_FLAGS_3(divs64, TCG_CALL_NO_WG, s64, env, s64, s64)
26
-DEF_HELPER_FLAGS_4(divu64, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
27
+DEF_HELPER_FLAGS_3(divs64, TCG_CALL_NO_WG, i128, env, s64, s64)
28
+DEF_HELPER_FLAGS_4(divu64, TCG_CALL_NO_WG, i128, env, i64, i64, i64)
29
DEF_HELPER_3(srst, void, env, i32, i32)
30
DEF_HELPER_3(srstu, void, env, i32, i32)
31
DEF_HELPER_4(clst, i64, env, i64, i64, i64)
32
diff --git a/target/s390x/tcg/int_helper.c b/target/s390x/tcg/int_helper.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/target/s390x/tcg/int_helper.c
35
+++ b/target/s390x/tcg/int_helper.c
36
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(divu32)(CPUS390XState *env, uint64_t a, uint64_t b64)
37
}
38
39
/* 64/64 -> 64 signed division */
40
-int64_t HELPER(divs64)(CPUS390XState *env, int64_t a, int64_t b)
41
+Int128 HELPER(divs64)(CPUS390XState *env, int64_t a, int64_t b)
42
{
43
/* Catch divide by zero, and non-representable quotient (MIN / -1). */
44
if (b == 0 || (b == -1 && a == (1ll << 63))) {
45
tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC());
46
}
47
- env->retxl = a % b;
48
- return a / b;
49
+ return int128_make128(a / b, a % b);
50
}
51
52
/* 128 -> 64/64 unsigned division */
53
-uint64_t HELPER(divu64)(CPUS390XState *env, uint64_t ah, uint64_t al,
54
- uint64_t b)
55
+Int128 HELPER(divu64)(CPUS390XState *env, uint64_t ah, uint64_t al, uint64_t b)
56
{
57
- uint64_t ret;
58
- /* Signal divide by zero. */
59
- if (b == 0) {
60
- tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC());
61
- }
62
- if (ah == 0) {
63
- /* 64 -> 64/64 case */
64
- env->retxl = al % b;
65
- ret = al / b;
66
- } else {
67
- /* ??? Move i386 idivq helper to host-utils. */
68
-#ifdef CONFIG_INT128
69
- __uint128_t a = ((__uint128_t)ah << 64) | al;
70
- __uint128_t q = a / b;
71
- env->retxl = a % b;
72
- ret = q;
73
- if (ret != q) {
74
- tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC());
75
+ if (b != 0) {
76
+ uint64_t r = divu128(&al, &ah, b);
77
+ if (ah == 0) {
78
+ return int128_make128(al, r);
79
}
80
-#else
81
- /* 32-bit hosts would need special wrapper functionality - just abort if
82
- we encounter such a case; it's very unlikely anyways. */
83
- cpu_abort(env_cpu(env), "128 -> 64/64 division not implemented\n");
84
-#endif
85
}
86
- return ret;
87
+ /* divide by zero or overflow */
88
+ tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC());
89
}
90
91
uint64_t HELPER(cvd)(int32_t reg)
92
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
93
index XXXXXXX..XXXXXXX 100644
94
--- a/target/s390x/tcg/translate.c
95
+++ b/target/s390x/tcg/translate.c
96
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_divu32(DisasContext *s, DisasOps *o)
97
98
static DisasJumpType op_divs64(DisasContext *s, DisasOps *o)
99
{
100
- gen_helper_divs64(o->out2, cpu_env, o->in1, o->in2);
101
- return_low128(o->out);
102
+ TCGv_i128 t = tcg_temp_new_i128();
103
+
104
+ gen_helper_divs64(t, cpu_env, o->in1, o->in2);
105
+ tcg_gen_extr_i128_i64(o->out2, o->out, t);
106
+ tcg_temp_free_i128(t);
107
return DISAS_NEXT;
108
}
109
110
static DisasJumpType op_divu64(DisasContext *s, DisasOps *o)
111
{
112
- gen_helper_divu64(o->out2, cpu_env, o->out, o->out2, o->in2);
113
- return_low128(o->out);
114
+ TCGv_i128 t = tcg_temp_new_i128();
115
+
116
+ gen_helper_divu64(t, cpu_env, o->out, o->out2, o->in2);
117
+ tcg_gen_extr_i128_i64(o->out2, o->out, t);
118
+ tcg_temp_free_i128(t);
119
return DISAS_NEXT;
120
}
121
122
diff --git a/tests/tcg/s390x/div.c b/tests/tcg/s390x/div.c
123
index XXXXXXX..XXXXXXX 100644
124
--- a/tests/tcg/s390x/div.c
125
+++ b/tests/tcg/s390x/div.c
126
@@ -XXX,XX +XXX,XX @@ static void test_dlr(void)
127
assert(r == 1);
128
}
129
130
+static void test_dsgr(void)
131
+{
132
+ register int64_t r0 asm("r0") = -1;
133
+ register int64_t r1 asm("r1") = -4241;
134
+ int64_t b = 101, q, r;
135
+
136
+ asm("dsgr %[r0],%[b]"
137
+ : [r0] "+r" (r0), [r1] "+r" (r1)
138
+ : [b] "r" (b)
139
+ : "cc");
140
+ q = r1;
141
+ r = r0;
142
+ assert(q == -41);
143
+ assert(r == -100);
144
+}
145
+
146
+static void test_dlgr(void)
147
+{
148
+ register uint64_t r0 asm("r0") = 0;
149
+ register uint64_t r1 asm("r1") = 4243;
150
+ uint64_t b = 101, q, r;
151
+
152
+ asm("dlgr %[r0],%[b]"
153
+ : [r0] "+r" (r0), [r1] "+r" (r1)
154
+ : [b] "r" (b)
155
+ : "cc");
156
+ q = r1;
157
+ r = r0;
158
+ assert(q == 42);
159
+ assert(r == 1);
160
+}
161
+
162
int main(void)
163
{
164
test_dr();
165
test_dlr();
166
+ test_dsgr();
167
+ test_dlgr();
168
+ return 0;
169
}
170
--
171
2.34.1
172
173
diff view generated by jsdifflib
New patch
1
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
target/s390x/helper.h | 2 +-
6
target/s390x/tcg/mem_helper.c | 11 ++++-------
7
target/s390x/tcg/translate.c | 8 ++++++--
8
3 files changed, 11 insertions(+), 10 deletions(-)
1
9
10
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/s390x/helper.h
13
+++ b/target/s390x/helper.h
14
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(divs64, TCG_CALL_NO_WG, i128, env, s64, s64)
15
DEF_HELPER_FLAGS_4(divu64, TCG_CALL_NO_WG, i128, env, i64, i64, i64)
16
DEF_HELPER_3(srst, void, env, i32, i32)
17
DEF_HELPER_3(srstu, void, env, i32, i32)
18
-DEF_HELPER_4(clst, i64, env, i64, i64, i64)
19
+DEF_HELPER_4(clst, i128, env, i64, i64, i64)
20
DEF_HELPER_FLAGS_4(mvn, TCG_CALL_NO_WG, void, env, i32, i64, i64)
21
DEF_HELPER_FLAGS_4(mvo, TCG_CALL_NO_WG, void, env, i32, i64, i64)
22
DEF_HELPER_FLAGS_4(mvpg, TCG_CALL_NO_WG, i32, env, i64, i32, i32)
23
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
24
index XXXXXXX..XXXXXXX 100644
25
--- a/target/s390x/tcg/mem_helper.c
26
+++ b/target/s390x/tcg/mem_helper.c
27
@@ -XXX,XX +XXX,XX @@ void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
28
}
29
30
/* unsigned string compare (c is string terminator) */
31
-uint64_t HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
32
+Int128 HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
33
{
34
uintptr_t ra = GETPC();
35
uint32_t len;
36
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
37
if (v1 == c) {
38
/* Equal. CC=0, and don't advance the registers. */
39
env->cc_op = 0;
40
- env->retxl = s2;
41
- return s1;
42
+ return int128_make128(s2, s1);
43
}
44
} else {
45
/* Unequal. CC={1,2}, and advance the registers. Note that
46
the terminator need not be zero, but the string that contains
47
the terminator is by definition "low". */
48
env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
49
- env->retxl = s2 + len;
50
- return s1 + len;
51
+ return int128_make128(s2 + len, s1 + len);
52
}
53
}
54
55
/* CPU-determined bytes equal; advance the registers. */
56
env->cc_op = 3;
57
- env->retxl = s2 + len;
58
- return s1 + len;
59
+ return int128_make128(s2 + len, s1 + len);
60
}
61
62
/* move page */
63
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
64
index XXXXXXX..XXXXXXX 100644
65
--- a/target/s390x/tcg/translate.c
66
+++ b/target/s390x/tcg/translate.c
67
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_clm(DisasContext *s, DisasOps *o)
68
69
static DisasJumpType op_clst(DisasContext *s, DisasOps *o)
70
{
71
- gen_helper_clst(o->in1, cpu_env, regs[0], o->in1, o->in2);
72
+ TCGv_i128 pair = tcg_temp_new_i128();
73
+
74
+ gen_helper_clst(pair, cpu_env, regs[0], o->in1, o->in2);
75
+ tcg_gen_extr_i128_i64(o->in2, o->in1, pair);
76
+ tcg_temp_free_i128(pair);
77
+
78
set_cc_static(s);
79
- return_low128(o->in2);
80
return DISAS_NEXT;
81
}
82
83
--
84
2.34.1
85
86
diff view generated by jsdifflib
New patch
1
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
2
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
target/s390x/helper.h | 2 +-
6
target/s390x/tcg/mem_helper.c | 7 +++----
7
target/s390x/tcg/translate.c | 6 ++++--
8
3 files changed, 8 insertions(+), 7 deletions(-)
1
9
10
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/s390x/helper.h
13
+++ b/target/s390x/helper.h
14
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(tre, i64, env, i64, i64, i64)
15
DEF_HELPER_4(trt, i32, env, i32, i64, i64)
16
DEF_HELPER_4(trtr, i32, env, i32, i64, i64)
17
DEF_HELPER_5(trXX, i32, env, i32, i32, i32, i32)
18
-DEF_HELPER_4(cksm, i64, env, i64, i64, i64)
19
+DEF_HELPER_4(cksm, i128, env, i64, i64, i64)
20
DEF_HELPER_FLAGS_5(calc_cc, TCG_CALL_NO_RWG_SE, i32, env, i32, i64, i64, i64)
21
DEF_HELPER_FLAGS_2(sfpc, TCG_CALL_NO_WG, void, env, i64)
22
DEF_HELPER_FLAGS_2(sfas, TCG_CALL_NO_WG, void, env, i64)
23
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
24
index XXXXXXX..XXXXXXX 100644
25
--- a/target/s390x/tcg/mem_helper.c
26
+++ b/target/s390x/tcg/mem_helper.c
27
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
28
}
29
30
/* checksum */
31
-uint64_t HELPER(cksm)(CPUS390XState *env, uint64_t r1,
32
- uint64_t src, uint64_t src_len)
33
+Int128 HELPER(cksm)(CPUS390XState *env, uint64_t r1,
34
+ uint64_t src, uint64_t src_len)
35
{
36
uintptr_t ra = GETPC();
37
uint64_t max_len, len;
38
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(cksm)(CPUS390XState *env, uint64_t r1,
39
env->cc_op = (len == src_len ? 0 : 3);
40
41
/* Return both cksm and processed length. */
42
- env->retxl = cksm;
43
- return len;
44
+ return int128_make128(cksm, len);
45
}
46
47
void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
48
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
49
index XXXXXXX..XXXXXXX 100644
50
--- a/target/s390x/tcg/translate.c
51
+++ b/target/s390x/tcg/translate.c
52
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_cxlgb(DisasContext *s, DisasOps *o)
53
static DisasJumpType op_cksm(DisasContext *s, DisasOps *o)
54
{
55
int r2 = get_field(s, r2);
56
+ TCGv_i128 pair = tcg_temp_new_i128();
57
TCGv_i64 len = tcg_temp_new_i64();
58
59
- gen_helper_cksm(len, cpu_env, o->in1, o->in2, regs[r2 + 1]);
60
+ gen_helper_cksm(pair, cpu_env, o->in1, o->in2, regs[r2 + 1]);
61
set_cc_static(s);
62
- return_low128(o->out);
63
+ tcg_gen_extr_i128_i64(o->out, len, pair);
64
+ tcg_temp_free_i128(pair);
65
66
tcg_gen_add_i64(regs[r2], regs[r2], len);
67
tcg_gen_sub_i64(regs[r2 + 1], regs[r2 + 1], len);
68
--
69
2.34.1
70
71
diff view generated by jsdifflib
New patch
1
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
2
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
target/s390x/helper.h | 2 +-
6
target/s390x/tcg/mem_helper.c | 7 +++----
7
target/s390x/tcg/translate.c | 7 +++++--
8
3 files changed, 9 insertions(+), 7 deletions(-)
1
9
10
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/s390x/helper.h
13
+++ b/target/s390x/helper.h
14
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(unpka, TCG_CALL_NO_WG, i32, env, i64, i32, i64)
15
DEF_HELPER_FLAGS_4(unpku, TCG_CALL_NO_WG, i32, env, i64, i32, i64)
16
DEF_HELPER_FLAGS_3(tp, TCG_CALL_NO_WG, i32, env, i64, i32)
17
DEF_HELPER_FLAGS_4(tr, TCG_CALL_NO_WG, void, env, i32, i64, i64)
18
-DEF_HELPER_4(tre, i64, env, i64, i64, i64)
19
+DEF_HELPER_4(tre, i128, env, i64, i64, i64)
20
DEF_HELPER_4(trt, i32, env, i32, i64, i64)
21
DEF_HELPER_4(trtr, i32, env, i32, i64, i64)
22
DEF_HELPER_5(trXX, i32, env, i32, i32, i32, i32)
23
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
24
index XXXXXXX..XXXXXXX 100644
25
--- a/target/s390x/tcg/mem_helper.c
26
+++ b/target/s390x/tcg/mem_helper.c
27
@@ -XXX,XX +XXX,XX @@ void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
28
do_helper_tr(env, len, array, trans, GETPC());
29
}
30
31
-uint64_t HELPER(tre)(CPUS390XState *env, uint64_t array,
32
- uint64_t len, uint64_t trans)
33
+Int128 HELPER(tre)(CPUS390XState *env, uint64_t array,
34
+ uint64_t len, uint64_t trans)
35
{
36
uintptr_t ra = GETPC();
37
uint8_t end = env->regs[0] & 0xff;
38
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(tre)(CPUS390XState *env, uint64_t array,
39
}
40
41
env->cc_op = cc;
42
- env->retxl = len - i;
43
- return array + i;
44
+ return int128_make128(len - i, array + i);
45
}
46
47
static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
48
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
49
index XXXXXXX..XXXXXXX 100644
50
--- a/target/s390x/tcg/translate.c
51
+++ b/target/s390x/tcg/translate.c
52
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_tr(DisasContext *s, DisasOps *o)
53
54
static DisasJumpType op_tre(DisasContext *s, DisasOps *o)
55
{
56
- gen_helper_tre(o->out, cpu_env, o->out, o->out2, o->in2);
57
- return_low128(o->out2);
58
+ TCGv_i128 pair = tcg_temp_new_i128();
59
+
60
+ gen_helper_tre(pair, cpu_env, o->out, o->out2, o->in2);
61
+ tcg_gen_extr_i128_i64(o->out2, o->out, pair);
62
+ tcg_temp_free_i128(pair);
63
set_cc_static(s);
64
return DISAS_NEXT;
65
}
66
--
67
2.34.1
68
69
diff view generated by jsdifflib
New patch
1
Make a copy of wout_x1 before modifying it, as wout_x1_P
2
emphasizing that it operates on the out/out2 pair. The insns
3
that use x1_P are data movement that will not change to Int128.
1
4
5
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
target/s390x/tcg/insn-data.h.inc | 12 ++++++------
10
target/s390x/tcg/translate.c | 8 ++++++++
11
2 files changed, 14 insertions(+), 6 deletions(-)
12
13
diff --git a/target/s390x/tcg/insn-data.h.inc b/target/s390x/tcg/insn-data.h.inc
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/s390x/tcg/insn-data.h.inc
16
+++ b/target/s390x/tcg/insn-data.h.inc
17
@@ -XXX,XX +XXX,XX @@
18
F(0x3800, LER, RR_a, Z, 0, e2, 0, cond_e1e2, mov2, 0, IF_AFP1 | IF_AFP2)
19
F(0x7800, LE, RX_a, Z, 0, m2_32u, 0, e1, mov2, 0, IF_AFP1)
20
F(0xed64, LEY, RXY_a, LD, 0, m2_32u, 0, e1, mov2, 0, IF_AFP1)
21
- F(0xb365, LXR, RRE, Z, x2h, x2l, 0, x1, movx, 0, IF_AFP1)
22
+ F(0xb365, LXR, RRE, Z, x2h, x2l, 0, x1_P, movx, 0, IF_AFP1)
23
/* LOAD IMMEDIATE */
24
C(0xc001, LGFI, RIL_a, EI, 0, i2, 0, r1, mov2, 0)
25
/* LOAD RELATIVE LONG */
26
@@ -XXX,XX +XXX,XX @@
27
C(0xe332, LTGF, RXY_a, GIE, 0, a2, r1, 0, ld32s, s64)
28
F(0xb302, LTEBR, RRE, Z, 0, e2, 0, cond_e1e2, mov2, f32, IF_BFP)
29
F(0xb312, LTDBR, RRE, Z, 0, f2, 0, f1, mov2, f64, IF_BFP)
30
- F(0xb342, LTXBR, RRE, Z, x2h, x2l, 0, x1, movx, f128, IF_BFP)
31
+ F(0xb342, LTXBR, RRE, Z, x2h, x2l, 0, x1_P, movx, f128, IF_BFP)
32
/* LOAD AND TRAP */
33
C(0xe39f, LAT, RXY_a, LAT, 0, m2_32u, r1, 0, lat, 0)
34
C(0xe385, LGAT, RXY_a, LAT, 0, a2, r1, 0, lgat, 0)
35
@@ -XXX,XX +XXX,XX @@
36
C(0xb913, LCGFR, RRE, Z, 0, r2_32s, r1, 0, neg, neg64)
37
F(0xb303, LCEBR, RRE, Z, 0, e2, new, e1, negf32, f32, IF_BFP)
38
F(0xb313, LCDBR, RRE, Z, 0, f2, new, f1, negf64, f64, IF_BFP)
39
- F(0xb343, LCXBR, RRE, Z, x2h, x2l, new_P, x1, negf128, f128, IF_BFP)
40
+ F(0xb343, LCXBR, RRE, Z, x2h, x2l, new_P, x1_P, negf128, f128, IF_BFP)
41
F(0xb373, LCDFR, RRE, FPSSH, 0, f2, new, f1, negf64, 0, IF_AFP1 | IF_AFP2)
42
/* LOAD COUNT TO BLOCK BOUNDARY */
43
C(0xe727, LCBB, RXE, V, la2, 0, r1, 0, lcbb, 0)
44
@@ -XXX,XX +XXX,XX @@
45
C(0xb911, LNGFR, RRE, Z, 0, r2_32s, r1, 0, nabs, nabs64)
46
F(0xb301, LNEBR, RRE, Z, 0, e2, new, e1, nabsf32, f32, IF_BFP)
47
F(0xb311, LNDBR, RRE, Z, 0, f2, new, f1, nabsf64, f64, IF_BFP)
48
- F(0xb341, LNXBR, RRE, Z, x2h, x2l, new_P, x1, nabsf128, f128, IF_BFP)
49
+ F(0xb341, LNXBR, RRE, Z, x2h, x2l, new_P, x1_P, nabsf128, f128, IF_BFP)
50
F(0xb371, LNDFR, RRE, FPSSH, 0, f2, new, f1, nabsf64, 0, IF_AFP1 | IF_AFP2)
51
/* LOAD ON CONDITION */
52
C(0xb9f2, LOCR, RRF_c, LOC, r1, r2, new, r1_32, loc, 0)
53
@@ -XXX,XX +XXX,XX @@
54
C(0xb910, LPGFR, RRE, Z, 0, r2_32s, r1, 0, abs, abs64)
55
F(0xb300, LPEBR, RRE, Z, 0, e2, new, e1, absf32, f32, IF_BFP)
56
F(0xb310, LPDBR, RRE, Z, 0, f2, new, f1, absf64, f64, IF_BFP)
57
- F(0xb340, LPXBR, RRE, Z, x2h, x2l, new_P, x1, absf128, f128, IF_BFP)
58
+ F(0xb340, LPXBR, RRE, Z, x2h, x2l, new_P, x1_P, absf128, f128, IF_BFP)
59
F(0xb370, LPDFR, RRE, FPSSH, 0, f2, new, f1, absf64, 0, IF_AFP1 | IF_AFP2)
60
/* LOAD REVERSED */
61
C(0xb91f, LRVR, RRE, Z, 0, r2_32u, new, r1_32, rev32, 0)
62
@@ -XXX,XX +XXX,XX @@
63
/* LOAD ZERO */
64
F(0xb374, LZER, RRE, Z, 0, 0, 0, e1, zero, 0, IF_AFP1)
65
F(0xb375, LZDR, RRE, Z, 0, 0, 0, f1, zero, 0, IF_AFP1)
66
- F(0xb376, LZXR, RRE, Z, 0, 0, 0, x1, zero2, 0, IF_AFP1)
67
+ F(0xb376, LZXR, RRE, Z, 0, 0, 0, x1_P, zero2, 0, IF_AFP1)
68
69
/* LOAD FPC */
70
F(0xb29d, LFPC, S, Z, 0, m2_32u, 0, 0, sfpc, 0, IF_BFP)
71
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
72
index XXXXXXX..XXXXXXX 100644
73
--- a/target/s390x/tcg/translate.c
74
+++ b/target/s390x/tcg/translate.c
75
@@ -XXX,XX +XXX,XX @@ static void wout_x1(DisasContext *s, DisasOps *o)
76
}
77
#define SPEC_wout_x1 SPEC_r1_f128
78
79
+static void wout_x1_P(DisasContext *s, DisasOps *o)
80
+{
81
+ int f1 = get_field(s, r1);
82
+ store_freg(f1, o->out);
83
+ store_freg(f1 + 2, o->out2);
84
+}
85
+#define SPEC_wout_x1_P SPEC_r1_f128
86
+
87
static void wout_cond_r1r2_32(DisasContext *s, DisasOps *o)
88
{
89
if (get_field(s, r1) != get_field(s, r2)) {
90
--
91
2.34.1
92
93
diff view generated by jsdifflib
New patch
1
Acked-by: David Hildenbrand <david@redhat.com>
2
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
v2: Remove extraneous return_low128.
6
---
7
target/s390x/helper.h | 22 +++++++-------
8
target/s390x/tcg/insn-data.h.inc | 20 ++++++-------
9
target/s390x/tcg/fpu_helper.c | 29 +++++++++---------
10
target/s390x/tcg/translate.c | 51 +++++++++++++++++---------------
11
4 files changed, 63 insertions(+), 59 deletions(-)
1
12
13
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/s390x/helper.h
16
+++ b/target/s390x/helper.h
17
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(clcle, i32, env, i32, i64, i32)
18
DEF_HELPER_4(clclu, i32, env, i32, i64, i32)
19
DEF_HELPER_3(cegb, i64, env, s64, i32)
20
DEF_HELPER_3(cdgb, i64, env, s64, i32)
21
-DEF_HELPER_3(cxgb, i64, env, s64, i32)
22
+DEF_HELPER_3(cxgb, i128, env, s64, i32)
23
DEF_HELPER_3(celgb, i64, env, i64, i32)
24
DEF_HELPER_3(cdlgb, i64, env, i64, i32)
25
-DEF_HELPER_3(cxlgb, i64, env, i64, i32)
26
+DEF_HELPER_3(cxlgb, i128, env, i64, i32)
27
DEF_HELPER_4(cdsg, void, env, i64, i32, i32)
28
DEF_HELPER_4(cdsg_parallel, void, env, i64, i32, i32)
29
DEF_HELPER_4(csst, i32, env, i32, i64, i64)
30
DEF_HELPER_4(csst_parallel, i32, env, i32, i64, i64)
31
DEF_HELPER_FLAGS_3(aeb, TCG_CALL_NO_WG, i64, env, i64, i64)
32
DEF_HELPER_FLAGS_3(adb, TCG_CALL_NO_WG, i64, env, i64, i64)
33
-DEF_HELPER_FLAGS_5(axb, TCG_CALL_NO_WG, i64, env, i64, i64, i64, i64)
34
+DEF_HELPER_FLAGS_5(axb, TCG_CALL_NO_WG, i128, env, i64, i64, i64, i64)
35
DEF_HELPER_FLAGS_3(seb, TCG_CALL_NO_WG, i64, env, i64, i64)
36
DEF_HELPER_FLAGS_3(sdb, TCG_CALL_NO_WG, i64, env, i64, i64)
37
-DEF_HELPER_FLAGS_5(sxb, TCG_CALL_NO_WG, i64, env, i64, i64, i64, i64)
38
+DEF_HELPER_FLAGS_5(sxb, TCG_CALL_NO_WG, i128, env, i64, i64, i64, i64)
39
DEF_HELPER_FLAGS_3(deb, TCG_CALL_NO_WG, i64, env, i64, i64)
40
DEF_HELPER_FLAGS_3(ddb, TCG_CALL_NO_WG, i64, env, i64, i64)
41
-DEF_HELPER_FLAGS_5(dxb, TCG_CALL_NO_WG, i64, env, i64, i64, i64, i64)
42
+DEF_HELPER_FLAGS_5(dxb, TCG_CALL_NO_WG, i128, env, i64, i64, i64, i64)
43
DEF_HELPER_FLAGS_3(meeb, TCG_CALL_NO_WG, i64, env, i64, i64)
44
DEF_HELPER_FLAGS_3(mdeb, TCG_CALL_NO_WG, i64, env, i64, i64)
45
DEF_HELPER_FLAGS_3(mdb, TCG_CALL_NO_WG, i64, env, i64, i64)
46
-DEF_HELPER_FLAGS_5(mxb, TCG_CALL_NO_WG, i64, env, i64, i64, i64, i64)
47
-DEF_HELPER_FLAGS_4(mxdb, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
48
+DEF_HELPER_FLAGS_5(mxb, TCG_CALL_NO_WG, i128, env, i64, i64, i64, i64)
49
+DEF_HELPER_FLAGS_4(mxdb, TCG_CALL_NO_WG, i128, env, i64, i64, i64)
50
DEF_HELPER_FLAGS_2(ldeb, TCG_CALL_NO_WG, i64, env, i64)
51
DEF_HELPER_FLAGS_4(ldxb, TCG_CALL_NO_WG, i64, env, i64, i64, i32)
52
-DEF_HELPER_FLAGS_2(lxdb, TCG_CALL_NO_WG, i64, env, i64)
53
-DEF_HELPER_FLAGS_2(lxeb, TCG_CALL_NO_WG, i64, env, i64)
54
+DEF_HELPER_FLAGS_2(lxdb, TCG_CALL_NO_WG, i128, env, i64)
55
+DEF_HELPER_FLAGS_2(lxeb, TCG_CALL_NO_WG, i128, env, i64)
56
DEF_HELPER_FLAGS_3(ledb, TCG_CALL_NO_WG, i64, env, i64, i32)
57
DEF_HELPER_FLAGS_4(lexb, TCG_CALL_NO_WG, i64, env, i64, i64, i32)
58
DEF_HELPER_FLAGS_3(ceb, TCG_CALL_NO_WG_SE, i32, env, i64, i64)
59
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(clfdb, i64, env, i64, i32)
60
DEF_HELPER_4(clfxb, i64, env, i64, i64, i32)
61
DEF_HELPER_FLAGS_3(fieb, TCG_CALL_NO_WG, i64, env, i64, i32)
62
DEF_HELPER_FLAGS_3(fidb, TCG_CALL_NO_WG, i64, env, i64, i32)
63
-DEF_HELPER_FLAGS_4(fixb, TCG_CALL_NO_WG, i64, env, i64, i64, i32)
64
+DEF_HELPER_FLAGS_4(fixb, TCG_CALL_NO_WG, i128, env, i64, i64, i32)
65
DEF_HELPER_FLAGS_4(maeb, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
66
DEF_HELPER_FLAGS_4(madb, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
67
DEF_HELPER_FLAGS_4(mseb, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
68
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(tcdb, TCG_CALL_NO_RWG_SE, i32, env, i64, i64)
69
DEF_HELPER_FLAGS_4(tcxb, TCG_CALL_NO_RWG_SE, i32, env, i64, i64, i64)
70
DEF_HELPER_FLAGS_2(sqeb, TCG_CALL_NO_WG, i64, env, i64)
71
DEF_HELPER_FLAGS_2(sqdb, TCG_CALL_NO_WG, i64, env, i64)
72
-DEF_HELPER_FLAGS_3(sqxb, TCG_CALL_NO_WG, i64, env, i64, i64)
73
+DEF_HELPER_FLAGS_3(sqxb, TCG_CALL_NO_WG, i128, env, i64, i64)
74
DEF_HELPER_FLAGS_1(cvd, TCG_CALL_NO_RWG_SE, i64, s32)
75
DEF_HELPER_FLAGS_4(pack, TCG_CALL_NO_WG, void, env, i32, i64, i64)
76
DEF_HELPER_FLAGS_4(pka, TCG_CALL_NO_WG, void, env, i64, i64, i32)
77
diff --git a/target/s390x/tcg/insn-data.h.inc b/target/s390x/tcg/insn-data.h.inc
78
index XXXXXXX..XXXXXXX 100644
79
--- a/target/s390x/tcg/insn-data.h.inc
80
+++ b/target/s390x/tcg/insn-data.h.inc
81
@@ -XXX,XX +XXX,XX @@
82
/* CONVERT FROM FIXED */
83
F(0xb394, CEFBR, RRF_e, Z, 0, r2_32s, new, e1, cegb, 0, IF_BFP)
84
F(0xb395, CDFBR, RRF_e, Z, 0, r2_32s, new, f1, cdgb, 0, IF_BFP)
85
- F(0xb396, CXFBR, RRF_e, Z, 0, r2_32s, new_P, x1, cxgb, 0, IF_BFP)
86
+ F(0xb396, CXFBR, RRF_e, Z, 0, r2_32s, new_x, x1, cxgb, 0, IF_BFP)
87
F(0xb3a4, CEGBR, RRF_e, Z, 0, r2_o, new, e1, cegb, 0, IF_BFP)
88
F(0xb3a5, CDGBR, RRF_e, Z, 0, r2_o, new, f1, cdgb, 0, IF_BFP)
89
- F(0xb3a6, CXGBR, RRF_e, Z, 0, r2_o, new_P, x1, cxgb, 0, IF_BFP)
90
+ F(0xb3a6, CXGBR, RRF_e, Z, 0, r2_o, new_x, x1, cxgb, 0, IF_BFP)
91
/* CONVERT TO LOGICAL */
92
F(0xb39c, CLFEBR, RRF_e, FPE, 0, e2, new, r1_32, clfeb, 0, IF_BFP)
93
F(0xb39d, CLFDBR, RRF_e, FPE, 0, f2, new, r1_32, clfdb, 0, IF_BFP)
94
@@ -XXX,XX +XXX,XX @@
95
/* CONVERT FROM LOGICAL */
96
F(0xb390, CELFBR, RRF_e, FPE, 0, r2_32u, new, e1, celgb, 0, IF_BFP)
97
F(0xb391, CDLFBR, RRF_e, FPE, 0, r2_32u, new, f1, cdlgb, 0, IF_BFP)
98
- F(0xb392, CXLFBR, RRF_e, FPE, 0, r2_32u, new_P, x1, cxlgb, 0, IF_BFP)
99
+ F(0xb392, CXLFBR, RRF_e, FPE, 0, r2_32u, new_x, x1, cxlgb, 0, IF_BFP)
100
F(0xb3a0, CELGBR, RRF_e, FPE, 0, r2_o, new, e1, celgb, 0, IF_BFP)
101
F(0xb3a1, CDLGBR, RRF_e, FPE, 0, r2_o, new, f1, cdlgb, 0, IF_BFP)
102
- F(0xb3a2, CXLGBR, RRF_e, FPE, 0, r2_o, new_P, x1, cxlgb, 0, IF_BFP)
103
+ F(0xb3a2, CXLGBR, RRF_e, FPE, 0, r2_o, new_x, x1, cxlgb, 0, IF_BFP)
104
105
/* CONVERT UTF-8 TO UTF-16 */
106
D(0xb2a7, CU12, RRF_c, Z, 0, 0, 0, 0, cuXX, 0, 12)
107
@@ -XXX,XX +XXX,XX @@
108
/* LOAD FP INTEGER */
109
F(0xb357, FIEBR, RRF_e, Z, 0, e2, new, e1, fieb, 0, IF_BFP)
110
F(0xb35f, FIDBR, RRF_e, Z, 0, f2, new, f1, fidb, 0, IF_BFP)
111
- F(0xb347, FIXBR, RRF_e, Z, x2h, x2l, new_P, x1, fixb, 0, IF_BFP)
112
+ F(0xb347, FIXBR, RRF_e, Z, x2h, x2l, new_x, x1, fixb, 0, IF_BFP)
113
114
/* LOAD LENGTHENED */
115
F(0xb304, LDEBR, RRE, Z, 0, e2, new, f1, ldeb, 0, IF_BFP)
116
- F(0xb305, LXDBR, RRE, Z, 0, f2, new_P, x1, lxdb, 0, IF_BFP)
117
- F(0xb306, LXEBR, RRE, Z, 0, e2, new_P, x1, lxeb, 0, IF_BFP)
118
+ F(0xb305, LXDBR, RRE, Z, 0, f2, new_x, x1, lxdb, 0, IF_BFP)
119
+ F(0xb306, LXEBR, RRE, Z, 0, e2, new_x, x1, lxeb, 0, IF_BFP)
120
F(0xed04, LDEB, RXE, Z, 0, m2_32u, new, f1, ldeb, 0, IF_BFP)
121
- F(0xed05, LXDB, RXE, Z, 0, m2_64, new_P, x1, lxdb, 0, IF_BFP)
122
- F(0xed06, LXEB, RXE, Z, 0, m2_32u, new_P, x1, lxeb, 0, IF_BFP)
123
+ F(0xed05, LXDB, RXE, Z, 0, m2_64, new_x, x1, lxdb, 0, IF_BFP)
124
+ F(0xed06, LXEB, RXE, Z, 0, m2_32u, new_x, x1, lxeb, 0, IF_BFP)
125
F(0xb324, LDER, RXE, Z, 0, e2, new, f1, lde, 0, IF_AFP1)
126
F(0xed24, LDE, RXE, Z, 0, m2_32u, new, f1, lde, 0, IF_AFP1)
127
/* LOAD ROUNDED */
128
@@ -XXX,XX +XXX,XX @@
129
/* SQUARE ROOT */
130
F(0xb314, SQEBR, RRE, Z, 0, e2, new, e1, sqeb, 0, IF_BFP)
131
F(0xb315, SQDBR, RRE, Z, 0, f2, new, f1, sqdb, 0, IF_BFP)
132
- F(0xb316, SQXBR, RRE, Z, x2h, x2l, new_P, x1, sqxb, 0, IF_BFP)
133
+ F(0xb316, SQXBR, RRE, Z, x2h, x2l, new_x, x1, sqxb, 0, IF_BFP)
134
F(0xed14, SQEB, RXE, Z, 0, m2_32u, new, e1, sqeb, 0, IF_BFP)
135
F(0xed15, SQDB, RXE, Z, 0, m2_64, new, f1, sqdb, 0, IF_BFP)
136
137
diff --git a/target/s390x/tcg/fpu_helper.c b/target/s390x/tcg/fpu_helper.c
138
index XXXXXXX..XXXXXXX 100644
139
--- a/target/s390x/tcg/fpu_helper.c
140
+++ b/target/s390x/tcg/fpu_helper.c
141
@@ -XXX,XX +XXX,XX @@
142
#define HELPER_LOG(x...)
143
#endif
144
145
-#define RET128(F) (env->retxl = F.low, F.high)
146
+static inline Int128 RET128(float128 f)
147
+{
148
+ return int128_make128(f.low, f.high);
149
+}
150
151
uint8_t s390_softfloat_exc_to_ieee(unsigned int exc)
152
{
153
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(adb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
154
}
155
156
/* 128-bit FP addition */
157
-uint64_t HELPER(axb)(CPUS390XState *env, uint64_t ah, uint64_t al,
158
+Int128 HELPER(axb)(CPUS390XState *env, uint64_t ah, uint64_t al,
159
uint64_t bh, uint64_t bl)
160
{
161
float128 ret = float128_add(make_float128(ah, al),
162
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(sdb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
163
}
164
165
/* 128-bit FP subtraction */
166
-uint64_t HELPER(sxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
167
+Int128 HELPER(sxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
168
uint64_t bh, uint64_t bl)
169
{
170
float128 ret = float128_sub(make_float128(ah, al),
171
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(ddb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
172
}
173
174
/* 128-bit FP division */
175
-uint64_t HELPER(dxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
176
+Int128 HELPER(dxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
177
uint64_t bh, uint64_t bl)
178
{
179
float128 ret = float128_div(make_float128(ah, al),
180
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(mdeb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
181
}
182
183
/* 128-bit FP multiplication */
184
-uint64_t HELPER(mxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
185
+Int128 HELPER(mxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
186
uint64_t bh, uint64_t bl)
187
{
188
float128 ret = float128_mul(make_float128(ah, al),
189
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(mxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
190
}
191
192
/* 128/64-bit FP multiplication */
193
-uint64_t HELPER(mxdb)(CPUS390XState *env, uint64_t ah, uint64_t al,
194
- uint64_t f2)
195
+Int128 HELPER(mxdb)(CPUS390XState *env, uint64_t ah, uint64_t al, uint64_t f2)
196
{
197
float128 ret = float64_to_float128(f2, &env->fpu_status);
198
ret = float128_mul(make_float128(ah, al), ret, &env->fpu_status);
199
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(ldxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
200
}
201
202
/* convert 64-bit float to 128-bit float */
203
-uint64_t HELPER(lxdb)(CPUS390XState *env, uint64_t f2)
204
+Int128 HELPER(lxdb)(CPUS390XState *env, uint64_t f2)
205
{
206
float128 ret = float64_to_float128(f2, &env->fpu_status);
207
handle_exceptions(env, false, GETPC());
208
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(lxdb)(CPUS390XState *env, uint64_t f2)
209
}
210
211
/* convert 32-bit float to 128-bit float */
212
-uint64_t HELPER(lxeb)(CPUS390XState *env, uint64_t f2)
213
+Int128 HELPER(lxeb)(CPUS390XState *env, uint64_t f2)
214
{
215
float128 ret = float32_to_float128(f2, &env->fpu_status);
216
handle_exceptions(env, false, GETPC());
217
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(cdgb)(CPUS390XState *env, int64_t v2, uint32_t m34)
218
}
219
220
/* convert 64-bit int to 128-bit float */
221
-uint64_t HELPER(cxgb)(CPUS390XState *env, int64_t v2, uint32_t m34)
222
+Int128 HELPER(cxgb)(CPUS390XState *env, int64_t v2, uint32_t m34)
223
{
224
int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
225
float128 ret = int64_to_float128(v2, &env->fpu_status);
226
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(cdlgb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
227
}
228
229
/* convert 64-bit uint to 128-bit float */
230
-uint64_t HELPER(cxlgb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
231
+Int128 HELPER(cxlgb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
232
{
233
int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
234
float128 ret = uint64_to_float128(v2, &env->fpu_status);
235
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(fidb)(CPUS390XState *env, uint64_t f2, uint32_t m34)
236
}
237
238
/* round to integer 128-bit */
239
-uint64_t HELPER(fixb)(CPUS390XState *env, uint64_t ah, uint64_t al,
240
- uint32_t m34)
241
+Int128 HELPER(fixb)(CPUS390XState *env, uint64_t ah, uint64_t al, uint32_t m34)
242
{
243
int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
244
float128 ret = float128_round_to_int(make_float128(ah, al),
245
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(sqdb)(CPUS390XState *env, uint64_t f2)
246
}
247
248
/* square root 128-bit */
249
-uint64_t HELPER(sqxb)(CPUS390XState *env, uint64_t ah, uint64_t al)
250
+Int128 HELPER(sqxb)(CPUS390XState *env, uint64_t ah, uint64_t al)
251
{
252
float128 ret = float128_sqrt(make_float128(ah, al), &env->fpu_status);
253
handle_exceptions(env, false, GETPC());
254
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
255
index XXXXXXX..XXXXXXX 100644
256
--- a/target/s390x/tcg/translate.c
257
+++ b/target/s390x/tcg/translate.c
258
@@ -XXX,XX +XXX,XX @@ typedef struct {
259
bool g_out, g_out2, g_in1, g_in2;
260
TCGv_i64 out, out2, in1, in2;
261
TCGv_i64 addr1;
262
+ TCGv_i128 out_128;
263
} DisasOps;
264
265
/* Instructions can place constraints on their operands, raising specification
266
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_adb(DisasContext *s, DisasOps *o)
267
268
static DisasJumpType op_axb(DisasContext *s, DisasOps *o)
269
{
270
- gen_helper_axb(o->out, cpu_env, o->out, o->out2, o->in1, o->in2);
271
- return_low128(o->out2);
272
+ gen_helper_axb(o->out_128, cpu_env, o->out, o->out2, o->in1, o->in2);
273
return DISAS_NEXT;
274
}
275
276
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_cxgb(DisasContext *s, DisasOps *o)
277
if (!m34) {
278
return DISAS_NORETURN;
279
}
280
- gen_helper_cxgb(o->out, cpu_env, o->in2, m34);
281
+ gen_helper_cxgb(o->out_128, cpu_env, o->in2, m34);
282
tcg_temp_free_i32(m34);
283
- return_low128(o->out2);
284
return DISAS_NEXT;
285
}
286
287
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_cxlgb(DisasContext *s, DisasOps *o)
288
if (!m34) {
289
return DISAS_NORETURN;
290
}
291
- gen_helper_cxlgb(o->out, cpu_env, o->in2, m34);
292
+ gen_helper_cxlgb(o->out_128, cpu_env, o->in2, m34);
293
tcg_temp_free_i32(m34);
294
- return_low128(o->out2);
295
return DISAS_NEXT;
296
}
297
298
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_ddb(DisasContext *s, DisasOps *o)
299
300
static DisasJumpType op_dxb(DisasContext *s, DisasOps *o)
301
{
302
- gen_helper_dxb(o->out, cpu_env, o->out, o->out2, o->in1, o->in2);
303
- return_low128(o->out2);
304
+ gen_helper_dxb(o->out_128, cpu_env, o->out, o->out2, o->in1, o->in2);
305
return DISAS_NEXT;
306
}
307
308
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_fixb(DisasContext *s, DisasOps *o)
309
if (!m34) {
310
return DISAS_NORETURN;
311
}
312
- gen_helper_fixb(o->out, cpu_env, o->in1, o->in2, m34);
313
- return_low128(o->out2);
314
+ gen_helper_fixb(o->out_128, cpu_env, o->in1, o->in2, m34);
315
tcg_temp_free_i32(m34);
316
return DISAS_NEXT;
317
}
318
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_lexb(DisasContext *s, DisasOps *o)
319
320
static DisasJumpType op_lxdb(DisasContext *s, DisasOps *o)
321
{
322
- gen_helper_lxdb(o->out, cpu_env, o->in2);
323
- return_low128(o->out2);
324
+ gen_helper_lxdb(o->out_128, cpu_env, o->in2);
325
return DISAS_NEXT;
326
}
327
328
static DisasJumpType op_lxeb(DisasContext *s, DisasOps *o)
329
{
330
- gen_helper_lxeb(o->out, cpu_env, o->in2);
331
- return_low128(o->out2);
332
+ gen_helper_lxeb(o->out_128, cpu_env, o->in2);
333
return DISAS_NEXT;
334
}
335
336
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_mdb(DisasContext *s, DisasOps *o)
337
338
static DisasJumpType op_mxb(DisasContext *s, DisasOps *o)
339
{
340
- gen_helper_mxb(o->out, cpu_env, o->out, o->out2, o->in1, o->in2);
341
- return_low128(o->out2);
342
+ gen_helper_mxb(o->out_128, cpu_env, o->out, o->out2, o->in1, o->in2);
343
return DISAS_NEXT;
344
}
345
346
static DisasJumpType op_mxdb(DisasContext *s, DisasOps *o)
347
{
348
- gen_helper_mxdb(o->out, cpu_env, o->out, o->out2, o->in2);
349
- return_low128(o->out2);
350
+ gen_helper_mxdb(o->out_128, cpu_env, o->out, o->out2, o->in2);
351
return DISAS_NEXT;
352
}
353
354
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_sdb(DisasContext *s, DisasOps *o)
355
356
static DisasJumpType op_sxb(DisasContext *s, DisasOps *o)
357
{
358
- gen_helper_sxb(o->out, cpu_env, o->out, o->out2, o->in1, o->in2);
359
- return_low128(o->out2);
360
+ gen_helper_sxb(o->out_128, cpu_env, o->out, o->out2, o->in1, o->in2);
361
return DISAS_NEXT;
362
}
363
364
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_sqdb(DisasContext *s, DisasOps *o)
365
366
static DisasJumpType op_sqxb(DisasContext *s, DisasOps *o)
367
{
368
- gen_helper_sqxb(o->out, cpu_env, o->in1, o->in2);
369
- return_low128(o->out2);
370
+ gen_helper_sqxb(o->out_128, cpu_env, o->in1, o->in2);
371
return DISAS_NEXT;
372
}
373
374
@@ -XXX,XX +XXX,XX @@ static void prep_new_P(DisasContext *s, DisasOps *o)
375
}
376
#define SPEC_prep_new_P 0
377
378
+static void prep_new_x(DisasContext *s, DisasOps *o)
379
+{
380
+ o->out = tcg_temp_new_i64();
381
+ o->out2 = tcg_temp_new_i64();
382
+ o->out_128 = tcg_temp_new_i128();
383
+}
384
+#define SPEC_prep_new_x 0
385
+
386
static void prep_r1(DisasContext *s, DisasOps *o)
387
{
388
o->out = regs[get_field(s, r1)];
389
@@ -XXX,XX +XXX,XX @@ static void prep_r1_P(DisasContext *s, DisasOps *o)
390
}
391
#define SPEC_prep_r1_P SPEC_r1_even
392
393
-/* Whenever we need x1 in addition to other inputs, we'll load it to out/out2 */
394
static void prep_x1(DisasContext *s, DisasOps *o)
395
{
396
o->out = load_freg(get_field(s, r1));
397
o->out2 = load_freg(get_field(s, r1) + 2);
398
+ o->out_128 = tcg_temp_new_i128();
399
+ tcg_gen_concat_i64_i128(o->out_128, o->out2, o->out);
400
}
401
#define SPEC_prep_x1 SPEC_r1_f128
402
403
@@ -XXX,XX +XXX,XX @@ static void wout_f1(DisasContext *s, DisasOps *o)
404
static void wout_x1(DisasContext *s, DisasOps *o)
405
{
406
int f1 = get_field(s, r1);
407
+
408
+ tcg_gen_extr_i128_i64(o->out2, o->out, o->out_128);
409
store_freg(f1, o->out);
410
store_freg(f1 + 2, o->out2);
411
}
412
@@ -XXX,XX +XXX,XX @@ static DisasJumpType translate_one(CPUS390XState *env, DisasContext *s)
413
if (o.addr1) {
414
tcg_temp_free_i64(o.addr1);
415
}
416
-
417
+ if (o.out_128) {
418
+ tcg_temp_free_i128(o.out_128);
419
+ }
420
/* io should be the last instruction in tb when icount is enabled */
421
if (unlikely(icount && ret == DISAS_NEXT)) {
422
ret = DISAS_TOO_MANY;
423
--
424
2.34.1
425
426
diff view generated by jsdifflib
New patch
1
Acked-by: David Hildenbrand <david@redhat.com>
2
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
v2: Fix SPEC_in1_x1.
6
---
7
target/s390x/helper.h | 32 ++++++------
8
target/s390x/tcg/insn-data.h.inc | 30 +++++------
9
target/s390x/tcg/fpu_helper.c | 88 ++++++++++++++------------------
10
target/s390x/tcg/translate.c | 76 ++++++++++++++++++---------
11
4 files changed, 121 insertions(+), 105 deletions(-)
1
12
13
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/s390x/helper.h
16
+++ b/target/s390x/helper.h
17
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(csst, i32, env, i32, i64, i64)
18
DEF_HELPER_4(csst_parallel, i32, env, i32, i64, i64)
19
DEF_HELPER_FLAGS_3(aeb, TCG_CALL_NO_WG, i64, env, i64, i64)
20
DEF_HELPER_FLAGS_3(adb, TCG_CALL_NO_WG, i64, env, i64, i64)
21
-DEF_HELPER_FLAGS_5(axb, TCG_CALL_NO_WG, i128, env, i64, i64, i64, i64)
22
+DEF_HELPER_FLAGS_3(axb, TCG_CALL_NO_WG, i128, env, i128, i128)
23
DEF_HELPER_FLAGS_3(seb, TCG_CALL_NO_WG, i64, env, i64, i64)
24
DEF_HELPER_FLAGS_3(sdb, TCG_CALL_NO_WG, i64, env, i64, i64)
25
-DEF_HELPER_FLAGS_5(sxb, TCG_CALL_NO_WG, i128, env, i64, i64, i64, i64)
26
+DEF_HELPER_FLAGS_3(sxb, TCG_CALL_NO_WG, i128, env, i128, i128)
27
DEF_HELPER_FLAGS_3(deb, TCG_CALL_NO_WG, i64, env, i64, i64)
28
DEF_HELPER_FLAGS_3(ddb, TCG_CALL_NO_WG, i64, env, i64, i64)
29
-DEF_HELPER_FLAGS_5(dxb, TCG_CALL_NO_WG, i128, env, i64, i64, i64, i64)
30
+DEF_HELPER_FLAGS_3(dxb, TCG_CALL_NO_WG, i128, env, i128, i128)
31
DEF_HELPER_FLAGS_3(meeb, TCG_CALL_NO_WG, i64, env, i64, i64)
32
DEF_HELPER_FLAGS_3(mdeb, TCG_CALL_NO_WG, i64, env, i64, i64)
33
DEF_HELPER_FLAGS_3(mdb, TCG_CALL_NO_WG, i64, env, i64, i64)
34
-DEF_HELPER_FLAGS_5(mxb, TCG_CALL_NO_WG, i128, env, i64, i64, i64, i64)
35
-DEF_HELPER_FLAGS_4(mxdb, TCG_CALL_NO_WG, i128, env, i64, i64, i64)
36
+DEF_HELPER_FLAGS_3(mxb, TCG_CALL_NO_WG, i128, env, i128, i128)
37
+DEF_HELPER_FLAGS_3(mxdb, TCG_CALL_NO_WG, i128, env, i128, i64)
38
DEF_HELPER_FLAGS_2(ldeb, TCG_CALL_NO_WG, i64, env, i64)
39
-DEF_HELPER_FLAGS_4(ldxb, TCG_CALL_NO_WG, i64, env, i64, i64, i32)
40
+DEF_HELPER_FLAGS_3(ldxb, TCG_CALL_NO_WG, i64, env, i128, i32)
41
DEF_HELPER_FLAGS_2(lxdb, TCG_CALL_NO_WG, i128, env, i64)
42
DEF_HELPER_FLAGS_2(lxeb, TCG_CALL_NO_WG, i128, env, i64)
43
DEF_HELPER_FLAGS_3(ledb, TCG_CALL_NO_WG, i64, env, i64, i32)
44
-DEF_HELPER_FLAGS_4(lexb, TCG_CALL_NO_WG, i64, env, i64, i64, i32)
45
+DEF_HELPER_FLAGS_3(lexb, TCG_CALL_NO_WG, i64, env, i128, i32)
46
DEF_HELPER_FLAGS_3(ceb, TCG_CALL_NO_WG_SE, i32, env, i64, i64)
47
DEF_HELPER_FLAGS_3(cdb, TCG_CALL_NO_WG_SE, i32, env, i64, i64)
48
-DEF_HELPER_FLAGS_5(cxb, TCG_CALL_NO_WG_SE, i32, env, i64, i64, i64, i64)
49
+DEF_HELPER_FLAGS_3(cxb, TCG_CALL_NO_WG_SE, i32, env, i128, i128)
50
DEF_HELPER_FLAGS_3(keb, TCG_CALL_NO_WG, i32, env, i64, i64)
51
DEF_HELPER_FLAGS_3(kdb, TCG_CALL_NO_WG, i32, env, i64, i64)
52
-DEF_HELPER_FLAGS_5(kxb, TCG_CALL_NO_WG, i32, env, i64, i64, i64, i64)
53
+DEF_HELPER_FLAGS_3(kxb, TCG_CALL_NO_WG, i32, env, i128, i128)
54
DEF_HELPER_3(cgeb, i64, env, i64, i32)
55
DEF_HELPER_3(cgdb, i64, env, i64, i32)
56
-DEF_HELPER_4(cgxb, i64, env, i64, i64, i32)
57
+DEF_HELPER_3(cgxb, i64, env, i128, i32)
58
DEF_HELPER_3(cfeb, i64, env, i64, i32)
59
DEF_HELPER_3(cfdb, i64, env, i64, i32)
60
-DEF_HELPER_4(cfxb, i64, env, i64, i64, i32)
61
+DEF_HELPER_3(cfxb, i64, env, i128, i32)
62
DEF_HELPER_3(clgeb, i64, env, i64, i32)
63
DEF_HELPER_3(clgdb, i64, env, i64, i32)
64
-DEF_HELPER_4(clgxb, i64, env, i64, i64, i32)
65
+DEF_HELPER_3(clgxb, i64, env, i128, i32)
66
DEF_HELPER_3(clfeb, i64, env, i64, i32)
67
DEF_HELPER_3(clfdb, i64, env, i64, i32)
68
-DEF_HELPER_4(clfxb, i64, env, i64, i64, i32)
69
+DEF_HELPER_3(clfxb, i64, env, i128, i32)
70
DEF_HELPER_FLAGS_3(fieb, TCG_CALL_NO_WG, i64, env, i64, i32)
71
DEF_HELPER_FLAGS_3(fidb, TCG_CALL_NO_WG, i64, env, i64, i32)
72
-DEF_HELPER_FLAGS_4(fixb, TCG_CALL_NO_WG, i128, env, i64, i64, i32)
73
+DEF_HELPER_FLAGS_3(fixb, TCG_CALL_NO_WG, i128, env, i128, i32)
74
DEF_HELPER_FLAGS_4(maeb, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
75
DEF_HELPER_FLAGS_4(madb, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
76
DEF_HELPER_FLAGS_4(mseb, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
77
DEF_HELPER_FLAGS_4(msdb, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
78
DEF_HELPER_FLAGS_3(tceb, TCG_CALL_NO_RWG_SE, i32, env, i64, i64)
79
DEF_HELPER_FLAGS_3(tcdb, TCG_CALL_NO_RWG_SE, i32, env, i64, i64)
80
-DEF_HELPER_FLAGS_4(tcxb, TCG_CALL_NO_RWG_SE, i32, env, i64, i64, i64)
81
+DEF_HELPER_FLAGS_3(tcxb, TCG_CALL_NO_RWG_SE, i32, env, i128, i64)
82
DEF_HELPER_FLAGS_2(sqeb, TCG_CALL_NO_WG, i64, env, i64)
83
DEF_HELPER_FLAGS_2(sqdb, TCG_CALL_NO_WG, i64, env, i64)
84
-DEF_HELPER_FLAGS_3(sqxb, TCG_CALL_NO_WG, i128, env, i64, i64)
85
+DEF_HELPER_FLAGS_2(sqxb, TCG_CALL_NO_WG, i128, env, i128)
86
DEF_HELPER_FLAGS_1(cvd, TCG_CALL_NO_RWG_SE, i64, s32)
87
DEF_HELPER_FLAGS_4(pack, TCG_CALL_NO_WG, void, env, i32, i64, i64)
88
DEF_HELPER_FLAGS_4(pka, TCG_CALL_NO_WG, void, env, i64, i64, i32)
89
diff --git a/target/s390x/tcg/insn-data.h.inc b/target/s390x/tcg/insn-data.h.inc
90
index XXXXXXX..XXXXXXX 100644
91
--- a/target/s390x/tcg/insn-data.h.inc
92
+++ b/target/s390x/tcg/insn-data.h.inc
93
@@ -XXX,XX +XXX,XX @@
94
C(0xe318, AGF, RXY_a, Z, r1, m2_32s, r1, 0, add, adds64)
95
F(0xb30a, AEBR, RRE, Z, e1, e2, new, e1, aeb, f32, IF_BFP)
96
F(0xb31a, ADBR, RRE, Z, f1, f2, new, f1, adb, f64, IF_BFP)
97
- F(0xb34a, AXBR, RRE, Z, x2h, x2l, x1, x1, axb, f128, IF_BFP)
98
+ F(0xb34a, AXBR, RRE, Z, x1, x2, new_x, x1, axb, f128, IF_BFP)
99
F(0xed0a, AEB, RXE, Z, e1, m2_32u, new, e1, aeb, f32, IF_BFP)
100
F(0xed1a, ADB, RXE, Z, f1, m2_64, new, f1, adb, f64, IF_BFP)
101
/* ADD HIGH */
102
@@ -XXX,XX +XXX,XX @@
103
C(0xe330, CGF, RXY_a, Z, r1_o, m2_32s, 0, 0, 0, cmps64)
104
F(0xb309, CEBR, RRE, Z, e1, e2, 0, 0, ceb, 0, IF_BFP)
105
F(0xb319, CDBR, RRE, Z, f1, f2, 0, 0, cdb, 0, IF_BFP)
106
- F(0xb349, CXBR, RRE, Z, x2h, x2l, x1, 0, cxb, 0, IF_BFP)
107
+ F(0xb349, CXBR, RRE, Z, x1, x2, 0, 0, cxb, 0, IF_BFP)
108
F(0xed09, CEB, RXE, Z, e1, m2_32u, 0, 0, ceb, 0, IF_BFP)
109
F(0xed19, CDB, RXE, Z, f1, m2_64, 0, 0, cdb, 0, IF_BFP)
110
/* COMPARE AND SIGNAL */
111
F(0xb308, KEBR, RRE, Z, e1, e2, 0, 0, keb, 0, IF_BFP)
112
F(0xb318, KDBR, RRE, Z, f1, f2, 0, 0, kdb, 0, IF_BFP)
113
- F(0xb348, KXBR, RRE, Z, x2h, x2l, x1, 0, kxb, 0, IF_BFP)
114
+ F(0xb348, KXBR, RRE, Z, x1, x2, 0, 0, kxb, 0, IF_BFP)
115
F(0xed08, KEB, RXE, Z, e1, m2_32u, 0, 0, keb, 0, IF_BFP)
116
F(0xed18, KDB, RXE, Z, f1, m2_64, 0, 0, kdb, 0, IF_BFP)
117
/* COMPARE IMMEDIATE */
118
@@ -XXX,XX +XXX,XX @@
119
/* CONVERT TO FIXED */
120
F(0xb398, CFEBR, RRF_e, Z, 0, e2, new, r1_32, cfeb, 0, IF_BFP)
121
F(0xb399, CFDBR, RRF_e, Z, 0, f2, new, r1_32, cfdb, 0, IF_BFP)
122
- F(0xb39a, CFXBR, RRF_e, Z, x2h, x2l, new, r1_32, cfxb, 0, IF_BFP)
123
+ F(0xb39a, CFXBR, RRF_e, Z, 0, x2, new, r1_32, cfxb, 0, IF_BFP)
124
F(0xb3a8, CGEBR, RRF_e, Z, 0, e2, r1, 0, cgeb, 0, IF_BFP)
125
F(0xb3a9, CGDBR, RRF_e, Z, 0, f2, r1, 0, cgdb, 0, IF_BFP)
126
- F(0xb3aa, CGXBR, RRF_e, Z, x2h, x2l, r1, 0, cgxb, 0, IF_BFP)
127
+ F(0xb3aa, CGXBR, RRF_e, Z, 0, x2, r1, 0, cgxb, 0, IF_BFP)
128
/* CONVERT FROM FIXED */
129
F(0xb394, CEFBR, RRF_e, Z, 0, r2_32s, new, e1, cegb, 0, IF_BFP)
130
F(0xb395, CDFBR, RRF_e, Z, 0, r2_32s, new, f1, cdgb, 0, IF_BFP)
131
@@ -XXX,XX +XXX,XX @@
132
/* CONVERT TO LOGICAL */
133
F(0xb39c, CLFEBR, RRF_e, FPE, 0, e2, new, r1_32, clfeb, 0, IF_BFP)
134
F(0xb39d, CLFDBR, RRF_e, FPE, 0, f2, new, r1_32, clfdb, 0, IF_BFP)
135
- F(0xb39e, CLFXBR, RRF_e, FPE, x2h, x2l, new, r1_32, clfxb, 0, IF_BFP)
136
+ F(0xb39e, CLFXBR, RRF_e, FPE, 0, x2, new, r1_32, clfxb, 0, IF_BFP)
137
F(0xb3ac, CLGEBR, RRF_e, FPE, 0, e2, r1, 0, clgeb, 0, IF_BFP)
138
F(0xb3ad, CLGDBR, RRF_e, FPE, 0, f2, r1, 0, clgdb, 0, IF_BFP)
139
- F(0xb3ae, CLGXBR, RRF_e, FPE, x2h, x2l, r1, 0, clgxb, 0, IF_BFP)
140
+ F(0xb3ae, CLGXBR, RRF_e, FPE, 0, x2, r1, 0, clgxb, 0, IF_BFP)
141
/* CONVERT FROM LOGICAL */
142
F(0xb390, CELFBR, RRF_e, FPE, 0, r2_32u, new, e1, celgb, 0, IF_BFP)
143
F(0xb391, CDLFBR, RRF_e, FPE, 0, r2_32u, new, f1, cdlgb, 0, IF_BFP)
144
@@ -XXX,XX +XXX,XX @@
145
C(0x5d00, D, RX_a, Z, r1_D32, m2_32s, new_P, r1_P32, divs32, 0)
146
F(0xb30d, DEBR, RRE, Z, e1, e2, new, e1, deb, 0, IF_BFP)
147
F(0xb31d, DDBR, RRE, Z, f1, f2, new, f1, ddb, 0, IF_BFP)
148
- F(0xb34d, DXBR, RRE, Z, x2h, x2l, x1, x1, dxb, 0, IF_BFP)
149
+ F(0xb34d, DXBR, RRE, Z, x1, x2, new_x, x1, dxb, 0, IF_BFP)
150
F(0xed0d, DEB, RXE, Z, e1, m2_32u, new, e1, deb, 0, IF_BFP)
151
F(0xed1d, DDB, RXE, Z, f1, m2_64, new, f1, ddb, 0, IF_BFP)
152
/* DIVIDE LOGICAL */
153
@@ -XXX,XX +XXX,XX @@
154
/* LOAD FP INTEGER */
155
F(0xb357, FIEBR, RRF_e, Z, 0, e2, new, e1, fieb, 0, IF_BFP)
156
F(0xb35f, FIDBR, RRF_e, Z, 0, f2, new, f1, fidb, 0, IF_BFP)
157
- F(0xb347, FIXBR, RRF_e, Z, x2h, x2l, new_x, x1, fixb, 0, IF_BFP)
158
+ F(0xb347, FIXBR, RRF_e, Z, 0, x2, new_x, x1, fixb, 0, IF_BFP)
159
160
/* LOAD LENGTHENED */
161
F(0xb304, LDEBR, RRE, Z, 0, e2, new, f1, ldeb, 0, IF_BFP)
162
@@ -XXX,XX +XXX,XX @@
163
F(0xed24, LDE, RXE, Z, 0, m2_32u, new, f1, lde, 0, IF_AFP1)
164
/* LOAD ROUNDED */
165
F(0xb344, LEDBR, RRF_e, Z, 0, f2, new, e1, ledb, 0, IF_BFP)
166
- F(0xb345, LDXBR, RRF_e, Z, x2h, x2l, new, f1, ldxb, 0, IF_BFP)
167
- F(0xb346, LEXBR, RRF_e, Z, x2h, x2l, new, e1, lexb, 0, IF_BFP)
168
+ F(0xb345, LDXBR, RRF_e, Z, 0, x2, new, f1, ldxb, 0, IF_BFP)
169
+ F(0xb346, LEXBR, RRF_e, Z, 0, x2, new, e1, lexb, 0, IF_BFP)
170
171
/* LOAD MULTIPLE */
172
C(0x9800, LM, RS_a, Z, 0, a2, 0, 0, lm32, 0)
173
@@ -XXX,XX +XXX,XX @@
174
C(0xe384, MG, RXY_a, MIE2,r1p1_o, m2_64, r1_P, 0, muls128, 0)
175
F(0xb317, MEEBR, RRE, Z, e1, e2, new, e1, meeb, 0, IF_BFP)
176
F(0xb31c, MDBR, RRE, Z, f1, f2, new, f1, mdb, 0, IF_BFP)
177
- F(0xb34c, MXBR, RRE, Z, x2h, x2l, x1, x1, mxb, 0, IF_BFP)
178
+ F(0xb34c, MXBR, RRE, Z, x1, x2, new_x, x1, mxb, 0, IF_BFP)
179
F(0xb30c, MDEBR, RRE, Z, f1, e2, new, f1, mdeb, 0, IF_BFP)
180
F(0xb307, MXDBR, RRE, Z, 0, f2, x1, x1, mxdb, 0, IF_BFP)
181
F(0xed17, MEEB, RXE, Z, e1, m2_32u, new, e1, meeb, 0, IF_BFP)
182
@@ -XXX,XX +XXX,XX @@
183
/* SQUARE ROOT */
184
F(0xb314, SQEBR, RRE, Z, 0, e2, new, e1, sqeb, 0, IF_BFP)
185
F(0xb315, SQDBR, RRE, Z, 0, f2, new, f1, sqdb, 0, IF_BFP)
186
- F(0xb316, SQXBR, RRE, Z, x2h, x2l, new_x, x1, sqxb, 0, IF_BFP)
187
+ F(0xb316, SQXBR, RRE, Z, 0, x2, new_x, x1, sqxb, 0, IF_BFP)
188
F(0xed14, SQEB, RXE, Z, 0, m2_32u, new, e1, sqeb, 0, IF_BFP)
189
F(0xed15, SQDB, RXE, Z, 0, m2_64, new, f1, sqdb, 0, IF_BFP)
190
191
@@ -XXX,XX +XXX,XX @@
192
C(0xe319, SGF, RXY_a, Z, r1, m2_32s, r1, 0, sub, subs64)
193
F(0xb30b, SEBR, RRE, Z, e1, e2, new, e1, seb, f32, IF_BFP)
194
F(0xb31b, SDBR, RRE, Z, f1, f2, new, f1, sdb, f64, IF_BFP)
195
- F(0xb34b, SXBR, RRE, Z, x2h, x2l, x1, x1, sxb, f128, IF_BFP)
196
+ F(0xb34b, SXBR, RRE, Z, x1, x2, new_x, x1, sxb, f128, IF_BFP)
197
F(0xed0b, SEB, RXE, Z, e1, m2_32u, new, e1, seb, f32, IF_BFP)
198
F(0xed1b, SDB, RXE, Z, f1, m2_64, new, f1, sdb, f64, IF_BFP)
199
/* SUBTRACT HALFWORD */
200
@@ -XXX,XX +XXX,XX @@
201
/* TEST DATA CLASS */
202
F(0xed10, TCEB, RXE, Z, e1, a2, 0, 0, tceb, 0, IF_BFP)
203
F(0xed11, TCDB, RXE, Z, f1, a2, 0, 0, tcdb, 0, IF_BFP)
204
- F(0xed12, TCXB, RXE, Z, 0, a2, x1, 0, tcxb, 0, IF_BFP)
205
+ F(0xed12, TCXB, RXE, Z, x1, a2, 0, 0, tcxb, 0, IF_BFP)
206
207
/* TEST DECIMAL */
208
C(0xebc0, TP, RSL, E2, la1, 0, 0, 0, tp, 0)
209
diff --git a/target/s390x/tcg/fpu_helper.c b/target/s390x/tcg/fpu_helper.c
210
index XXXXXXX..XXXXXXX 100644
211
--- a/target/s390x/tcg/fpu_helper.c
212
+++ b/target/s390x/tcg/fpu_helper.c
213
@@ -XXX,XX +XXX,XX @@ static inline Int128 RET128(float128 f)
214
return int128_make128(f.low, f.high);
215
}
216
217
+static inline float128 ARG128(Int128 i)
218
+{
219
+ return make_float128(int128_gethi(i), int128_getlo(i));
220
+}
221
+
222
uint8_t s390_softfloat_exc_to_ieee(unsigned int exc)
223
{
224
uint8_t s390_exc = 0;
225
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(adb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
226
}
227
228
/* 128-bit FP addition */
229
-Int128 HELPER(axb)(CPUS390XState *env, uint64_t ah, uint64_t al,
230
- uint64_t bh, uint64_t bl)
231
+Int128 HELPER(axb)(CPUS390XState *env, Int128 a, Int128 b)
232
{
233
- float128 ret = float128_add(make_float128(ah, al),
234
- make_float128(bh, bl),
235
- &env->fpu_status);
236
+ float128 ret = float128_add(ARG128(a), ARG128(b), &env->fpu_status);
237
handle_exceptions(env, false, GETPC());
238
return RET128(ret);
239
}
240
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(sdb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
241
}
242
243
/* 128-bit FP subtraction */
244
-Int128 HELPER(sxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
245
- uint64_t bh, uint64_t bl)
246
+Int128 HELPER(sxb)(CPUS390XState *env, Int128 a, Int128 b)
247
{
248
- float128 ret = float128_sub(make_float128(ah, al),
249
- make_float128(bh, bl),
250
- &env->fpu_status);
251
+ float128 ret = float128_sub(ARG128(a), ARG128(b), &env->fpu_status);
252
handle_exceptions(env, false, GETPC());
253
return RET128(ret);
254
}
255
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(ddb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
256
}
257
258
/* 128-bit FP division */
259
-Int128 HELPER(dxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
260
- uint64_t bh, uint64_t bl)
261
+Int128 HELPER(dxb)(CPUS390XState *env, Int128 a, Int128 b)
262
{
263
- float128 ret = float128_div(make_float128(ah, al),
264
- make_float128(bh, bl),
265
- &env->fpu_status);
266
+ float128 ret = float128_div(ARG128(a), ARG128(b), &env->fpu_status);
267
handle_exceptions(env, false, GETPC());
268
return RET128(ret);
269
}
270
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(mdeb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
271
}
272
273
/* 128-bit FP multiplication */
274
-Int128 HELPER(mxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
275
- uint64_t bh, uint64_t bl)
276
+Int128 HELPER(mxb)(CPUS390XState *env, Int128 a, Int128 b)
277
{
278
- float128 ret = float128_mul(make_float128(ah, al),
279
- make_float128(bh, bl),
280
- &env->fpu_status);
281
+ float128 ret = float128_mul(ARG128(a), ARG128(b), &env->fpu_status);
282
handle_exceptions(env, false, GETPC());
283
return RET128(ret);
284
}
285
286
/* 128/64-bit FP multiplication */
287
-Int128 HELPER(mxdb)(CPUS390XState *env, uint64_t ah, uint64_t al, uint64_t f2)
288
+Int128 HELPER(mxdb)(CPUS390XState *env, Int128 a, uint64_t f2)
289
{
290
float128 ret = float64_to_float128(f2, &env->fpu_status);
291
- ret = float128_mul(make_float128(ah, al), ret, &env->fpu_status);
292
+ ret = float128_mul(ARG128(a), ret, &env->fpu_status);
293
handle_exceptions(env, false, GETPC());
294
return RET128(ret);
295
}
296
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(ldeb)(CPUS390XState *env, uint64_t f2)
297
}
298
299
/* convert 128-bit float to 64-bit float */
300
-uint64_t HELPER(ldxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
301
- uint32_t m34)
302
+uint64_t HELPER(ldxb)(CPUS390XState *env, Int128 a, uint32_t m34)
303
{
304
int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
305
- float64 ret = float128_to_float64(make_float128(ah, al), &env->fpu_status);
306
+ float64 ret = float128_to_float64(ARG128(a), &env->fpu_status);
307
308
s390_restore_bfp_rounding_mode(env, old_mode);
309
handle_exceptions(env, xxc_from_m34(m34), GETPC());
310
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(ledb)(CPUS390XState *env, uint64_t f2, uint32_t m34)
311
}
312
313
/* convert 128-bit float to 32-bit float */
314
-uint64_t HELPER(lexb)(CPUS390XState *env, uint64_t ah, uint64_t al,
315
- uint32_t m34)
316
+uint64_t HELPER(lexb)(CPUS390XState *env, Int128 a, uint32_t m34)
317
{
318
int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
319
- float32 ret = float128_to_float32(make_float128(ah, al), &env->fpu_status);
320
+ float32 ret = float128_to_float32(ARG128(a), &env->fpu_status);
321
322
s390_restore_bfp_rounding_mode(env, old_mode);
323
handle_exceptions(env, xxc_from_m34(m34), GETPC());
324
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(cdb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
325
}
326
327
/* 128-bit FP compare */
328
-uint32_t HELPER(cxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
329
- uint64_t bh, uint64_t bl)
330
+uint32_t HELPER(cxb)(CPUS390XState *env, Int128 a, Int128 b)
331
{
332
- FloatRelation cmp = float128_compare_quiet(make_float128(ah, al),
333
- make_float128(bh, bl),
334
+ FloatRelation cmp = float128_compare_quiet(ARG128(a), ARG128(b),
335
&env->fpu_status);
336
handle_exceptions(env, false, GETPC());
337
return float_comp_to_cc(env, cmp);
338
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(cgdb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
339
}
340
341
/* convert 128-bit float to 64-bit int */
342
-uint64_t HELPER(cgxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34)
343
+uint64_t HELPER(cgxb)(CPUS390XState *env, Int128 i2, uint32_t m34)
344
{
345
int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
346
- float128 v2 = make_float128(h, l);
347
+ float128 v2 = ARG128(i2);
348
int64_t ret = float128_to_int64(v2, &env->fpu_status);
349
uint32_t cc = set_cc_conv_f128(v2, &env->fpu_status);
350
351
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(cfdb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
352
}
353
354
/* convert 128-bit float to 32-bit int */
355
-uint64_t HELPER(cfxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34)
356
+uint64_t HELPER(cfxb)(CPUS390XState *env, Int128 i2, uint32_t m34)
357
{
358
int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
359
- float128 v2 = make_float128(h, l);
360
+ float128 v2 = ARG128(i2);
361
int32_t ret = float128_to_int32(v2, &env->fpu_status);
362
uint32_t cc = set_cc_conv_f128(v2, &env->fpu_status);
363
364
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(clgdb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
365
}
366
367
/* convert 128-bit float to 64-bit uint */
368
-uint64_t HELPER(clgxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34)
369
+uint64_t HELPER(clgxb)(CPUS390XState *env, Int128 i2, uint32_t m34)
370
{
371
int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
372
- float128 v2 = make_float128(h, l);
373
+ float128 v2 = ARG128(i2);
374
uint64_t ret = float128_to_uint64(v2, &env->fpu_status);
375
uint32_t cc = set_cc_conv_f128(v2, &env->fpu_status);
376
377
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(clfdb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
378
}
379
380
/* convert 128-bit float to 32-bit uint */
381
-uint64_t HELPER(clfxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34)
382
+uint64_t HELPER(clfxb)(CPUS390XState *env, Int128 i2, uint32_t m34)
383
{
384
int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
385
- float128 v2 = make_float128(h, l);
386
+ float128 v2 = ARG128(i2);
387
uint32_t ret = float128_to_uint32(v2, &env->fpu_status);
388
uint32_t cc = set_cc_conv_f128(v2, &env->fpu_status);
389
390
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(fidb)(CPUS390XState *env, uint64_t f2, uint32_t m34)
391
}
392
393
/* round to integer 128-bit */
394
-Int128 HELPER(fixb)(CPUS390XState *env, uint64_t ah, uint64_t al, uint32_t m34)
395
+Int128 HELPER(fixb)(CPUS390XState *env, Int128 a, uint32_t m34)
396
{
397
int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
398
- float128 ret = float128_round_to_int(make_float128(ah, al),
399
- &env->fpu_status);
400
+ float128 ret = float128_round_to_int(ARG128(a), &env->fpu_status);
401
402
s390_restore_bfp_rounding_mode(env, old_mode);
403
handle_exceptions(env, xxc_from_m34(m34), GETPC());
404
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(kdb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
405
}
406
407
/* 128-bit FP compare and signal */
408
-uint32_t HELPER(kxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
409
- uint64_t bh, uint64_t bl)
410
+uint32_t HELPER(kxb)(CPUS390XState *env, Int128 a, Int128 b)
411
{
412
- FloatRelation cmp = float128_compare(make_float128(ah, al),
413
- make_float128(bh, bl),
414
+ FloatRelation cmp = float128_compare(ARG128(a), ARG128(b),
415
&env->fpu_status);
416
handle_exceptions(env, false, GETPC());
417
return float_comp_to_cc(env, cmp);
418
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(tcdb)(CPUS390XState *env, uint64_t v1, uint64_t m2)
419
}
420
421
/* test data class 128-bit */
422
-uint32_t HELPER(tcxb)(CPUS390XState *env, uint64_t ah, uint64_t al, uint64_t m2)
423
+uint32_t HELPER(tcxb)(CPUS390XState *env, Int128 a, uint64_t m2)
424
{
425
- return (m2 & float128_dcmask(env, make_float128(ah, al))) != 0;
426
+ return (m2 & float128_dcmask(env, ARG128(a))) != 0;
427
}
428
429
/* square root 32-bit */
430
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(sqdb)(CPUS390XState *env, uint64_t f2)
431
}
432
433
/* square root 128-bit */
434
-Int128 HELPER(sqxb)(CPUS390XState *env, uint64_t ah, uint64_t al)
435
+Int128 HELPER(sqxb)(CPUS390XState *env, Int128 a)
436
{
437
- float128 ret = float128_sqrt(make_float128(ah, al), &env->fpu_status);
438
+ float128 ret = float128_sqrt(ARG128(a), &env->fpu_status);
439
handle_exceptions(env, false, GETPC());
440
return RET128(ret);
441
}
442
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
443
index XXXXXXX..XXXXXXX 100644
444
--- a/target/s390x/tcg/translate.c
445
+++ b/target/s390x/tcg/translate.c
446
@@ -XXX,XX +XXX,XX @@ static TCGv_i64 load_freg32_i64(int reg)
447
return r;
448
}
449
450
+static TCGv_i128 load_freg_128(int reg)
451
+{
452
+ TCGv_i64 h = load_freg(reg);
453
+ TCGv_i64 l = load_freg(reg + 2);
454
+ TCGv_i128 r = tcg_temp_new_i128();
455
+
456
+ tcg_gen_concat_i64_i128(r, l, h);
457
+ tcg_temp_free_i64(h);
458
+ tcg_temp_free_i64(l);
459
+ return r;
460
+}
461
+
462
static void store_reg(int reg, TCGv_i64 v)
463
{
464
tcg_gen_mov_i64(regs[reg], v);
465
@@ -XXX,XX +XXX,XX @@ typedef struct {
466
bool g_out, g_out2, g_in1, g_in2;
467
TCGv_i64 out, out2, in1, in2;
468
TCGv_i64 addr1;
469
- TCGv_i128 out_128;
470
+ TCGv_i128 out_128, in1_128, in2_128;
471
} DisasOps;
472
473
/* Instructions can place constraints on their operands, raising specification
474
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_adb(DisasContext *s, DisasOps *o)
475
476
static DisasJumpType op_axb(DisasContext *s, DisasOps *o)
477
{
478
- gen_helper_axb(o->out_128, cpu_env, o->out, o->out2, o->in1, o->in2);
479
+ gen_helper_axb(o->out_128, cpu_env, o->in1_128, o->in2_128);
480
return DISAS_NEXT;
481
}
482
483
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_cdb(DisasContext *s, DisasOps *o)
484
485
static DisasJumpType op_cxb(DisasContext *s, DisasOps *o)
486
{
487
- gen_helper_cxb(cc_op, cpu_env, o->out, o->out2, o->in1, o->in2);
488
+ gen_helper_cxb(cc_op, cpu_env, o->in1_128, o->in2_128);
489
set_cc_static(s);
490
return DISAS_NEXT;
491
}
492
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_cfxb(DisasContext *s, DisasOps *o)
493
if (!m34) {
494
return DISAS_NORETURN;
495
}
496
- gen_helper_cfxb(o->out, cpu_env, o->in1, o->in2, m34);
497
+ gen_helper_cfxb(o->out, cpu_env, o->in2_128, m34);
498
tcg_temp_free_i32(m34);
499
set_cc_static(s);
500
return DISAS_NEXT;
501
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_cgxb(DisasContext *s, DisasOps *o)
502
if (!m34) {
503
return DISAS_NORETURN;
504
}
505
- gen_helper_cgxb(o->out, cpu_env, o->in1, o->in2, m34);
506
+ gen_helper_cgxb(o->out, cpu_env, o->in2_128, m34);
507
tcg_temp_free_i32(m34);
508
set_cc_static(s);
509
return DISAS_NEXT;
510
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_clfxb(DisasContext *s, DisasOps *o)
511
if (!m34) {
512
return DISAS_NORETURN;
513
}
514
- gen_helper_clfxb(o->out, cpu_env, o->in1, o->in2, m34);
515
+ gen_helper_clfxb(o->out, cpu_env, o->in2_128, m34);
516
tcg_temp_free_i32(m34);
517
set_cc_static(s);
518
return DISAS_NEXT;
519
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_clgxb(DisasContext *s, DisasOps *o)
520
if (!m34) {
521
return DISAS_NORETURN;
522
}
523
- gen_helper_clgxb(o->out, cpu_env, o->in1, o->in2, m34);
524
+ gen_helper_clgxb(o->out, cpu_env, o->in2_128, m34);
525
tcg_temp_free_i32(m34);
526
set_cc_static(s);
527
return DISAS_NEXT;
528
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_ddb(DisasContext *s, DisasOps *o)
529
530
static DisasJumpType op_dxb(DisasContext *s, DisasOps *o)
531
{
532
- gen_helper_dxb(o->out_128, cpu_env, o->out, o->out2, o->in1, o->in2);
533
+ gen_helper_dxb(o->out_128, cpu_env, o->in1_128, o->in2_128);
534
return DISAS_NEXT;
535
}
536
537
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_fixb(DisasContext *s, DisasOps *o)
538
if (!m34) {
539
return DISAS_NORETURN;
540
}
541
- gen_helper_fixb(o->out_128, cpu_env, o->in1, o->in2, m34);
542
+ gen_helper_fixb(o->out_128, cpu_env, o->in2_128, m34);
543
tcg_temp_free_i32(m34);
544
return DISAS_NEXT;
545
}
546
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_kdb(DisasContext *s, DisasOps *o)
547
548
static DisasJumpType op_kxb(DisasContext *s, DisasOps *o)
549
{
550
- gen_helper_kxb(cc_op, cpu_env, o->out, o->out2, o->in1, o->in2);
551
+ gen_helper_kxb(cc_op, cpu_env, o->in1_128, o->in2_128);
552
set_cc_static(s);
553
return DISAS_NEXT;
554
}
555
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_ldxb(DisasContext *s, DisasOps *o)
556
if (!m34) {
557
return DISAS_NORETURN;
558
}
559
- gen_helper_ldxb(o->out, cpu_env, o->in1, o->in2, m34);
560
+ gen_helper_ldxb(o->out, cpu_env, o->in2_128, m34);
561
tcg_temp_free_i32(m34);
562
return DISAS_NEXT;
563
}
564
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_lexb(DisasContext *s, DisasOps *o)
565
if (!m34) {
566
return DISAS_NORETURN;
567
}
568
- gen_helper_lexb(o->out, cpu_env, o->in1, o->in2, m34);
569
+ gen_helper_lexb(o->out, cpu_env, o->in2_128, m34);
570
tcg_temp_free_i32(m34);
571
return DISAS_NEXT;
572
}
573
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_mdb(DisasContext *s, DisasOps *o)
574
575
static DisasJumpType op_mxb(DisasContext *s, DisasOps *o)
576
{
577
- gen_helper_mxb(o->out_128, cpu_env, o->out, o->out2, o->in1, o->in2);
578
+ gen_helper_mxb(o->out_128, cpu_env, o->in1_128, o->in2_128);
579
return DISAS_NEXT;
580
}
581
582
static DisasJumpType op_mxdb(DisasContext *s, DisasOps *o)
583
{
584
- gen_helper_mxdb(o->out_128, cpu_env, o->out, o->out2, o->in2);
585
+ gen_helper_mxdb(o->out_128, cpu_env, o->in1_128, o->in2);
586
return DISAS_NEXT;
587
}
588
589
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_sdb(DisasContext *s, DisasOps *o)
590
591
static DisasJumpType op_sxb(DisasContext *s, DisasOps *o)
592
{
593
- gen_helper_sxb(o->out_128, cpu_env, o->out, o->out2, o->in1, o->in2);
594
+ gen_helper_sxb(o->out_128, cpu_env, o->in1_128, o->in2_128);
595
return DISAS_NEXT;
596
}
597
598
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_sqdb(DisasContext *s, DisasOps *o)
599
600
static DisasJumpType op_sqxb(DisasContext *s, DisasOps *o)
601
{
602
- gen_helper_sqxb(o->out_128, cpu_env, o->in1, o->in2);
603
+ gen_helper_sqxb(o->out_128, cpu_env, o->in2_128);
604
return DISAS_NEXT;
605
}
606
607
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_tcdb(DisasContext *s, DisasOps *o)
608
609
static DisasJumpType op_tcxb(DisasContext *s, DisasOps *o)
610
{
611
- gen_helper_tcxb(cc_op, cpu_env, o->out, o->out2, o->in2);
612
+ gen_helper_tcxb(cc_op, cpu_env, o->in1_128, o->in2);
613
set_cc_static(s);
614
return DISAS_NEXT;
615
}
616
@@ -XXX,XX +XXX,XX @@ static void prep_new_P(DisasContext *s, DisasOps *o)
617
618
static void prep_new_x(DisasContext *s, DisasOps *o)
619
{
620
- o->out = tcg_temp_new_i64();
621
- o->out2 = tcg_temp_new_i64();
622
o->out_128 = tcg_temp_new_i128();
623
}
624
#define SPEC_prep_new_x 0
625
@@ -XXX,XX +XXX,XX @@ static void prep_r1_P(DisasContext *s, DisasOps *o)
626
627
static void prep_x1(DisasContext *s, DisasOps *o)
628
{
629
- o->out = load_freg(get_field(s, r1));
630
- o->out2 = load_freg(get_field(s, r1) + 2);
631
- o->out_128 = tcg_temp_new_i128();
632
- tcg_gen_concat_i64_i128(o->out_128, o->out2, o->out);
633
+ o->out_128 = load_freg_128(get_field(s, r1));
634
}
635
#define SPEC_prep_x1 SPEC_r1_f128
636
637
@@ -XXX,XX +XXX,XX @@ static void wout_x1(DisasContext *s, DisasOps *o)
638
{
639
int f1 = get_field(s, r1);
640
641
+ /* Split out_128 into out+out2 for cout_f128. */
642
+ tcg_debug_assert(o->out == NULL);
643
+ o->out = tcg_temp_new_i64();
644
+ o->out2 = tcg_temp_new_i64();
645
+
646
tcg_gen_extr_i128_i64(o->out2, o->out, o->out_128);
647
store_freg(f1, o->out);
648
store_freg(f1 + 2, o->out2);
649
@@ -XXX,XX +XXX,XX @@ static void in1_f1(DisasContext *s, DisasOps *o)
650
}
651
#define SPEC_in1_f1 0
652
653
+static void in1_x1(DisasContext *s, DisasOps *o)
654
+{
655
+ o->in1_128 = load_freg_128(get_field(s, r1));
656
+}
657
+#define SPEC_in1_x1 SPEC_r1_f128
658
+
659
/* Load the high double word of an extended (128-bit) format FP number */
660
static void in1_x2h(DisasContext *s, DisasOps *o)
661
{
662
@@ -XXX,XX +XXX,XX @@ static void in2_f2(DisasContext *s, DisasOps *o)
663
}
664
#define SPEC_in2_f2 0
665
666
+static void in2_x2(DisasContext *s, DisasOps *o)
667
+{
668
+ o->in2_128 = load_freg_128(get_field(s, r2));
669
+}
670
+#define SPEC_in2_x2 SPEC_r2_f128
671
+
672
/* Load the low double word of an extended (128-bit) format FP number */
673
static void in2_x2l(DisasContext *s, DisasOps *o)
674
{
675
@@ -XXX,XX +XXX,XX @@ static DisasJumpType translate_one(CPUS390XState *env, DisasContext *s)
676
if (o.out_128) {
677
tcg_temp_free_i128(o.out_128);
678
}
679
+ if (o.in1_128) {
680
+ tcg_temp_free_i128(o.in1_128);
681
+ }
682
+ if (o.in2_128) {
683
+ tcg_temp_free_i128(o.in2_128);
684
+ }
685
/* io should be the last instruction in tb when icount is enabled */
686
if (unlikely(icount && ret == DISAS_NEXT)) {
687
ret = DISAS_TOO_MANY;
688
--
689
2.34.1
690
691
diff view generated by jsdifflib
New patch
1
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
target/s390x/helper.h | 2 --
5
target/s390x/tcg/insn-data.h.inc | 2 +-
6
target/s390x/tcg/mem_helper.c | 52 ------------------------------
7
target/s390x/tcg/translate.c | 55 +++++++++++++++++++-------------
8
4 files changed, 33 insertions(+), 78 deletions(-)
1
9
10
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/s390x/helper.h
13
+++ b/target/s390x/helper.h
14
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(cxgb, i128, env, s64, i32)
15
DEF_HELPER_3(celgb, i64, env, i64, i32)
16
DEF_HELPER_3(cdlgb, i64, env, i64, i32)
17
DEF_HELPER_3(cxlgb, i128, env, i64, i32)
18
-DEF_HELPER_4(cdsg, void, env, i64, i32, i32)
19
-DEF_HELPER_4(cdsg_parallel, void, env, i64, i32, i32)
20
DEF_HELPER_4(csst, i32, env, i32, i64, i64)
21
DEF_HELPER_4(csst_parallel, i32, env, i32, i64, i64)
22
DEF_HELPER_FLAGS_3(aeb, TCG_CALL_NO_WG, i64, env, i64, i64)
23
diff --git a/target/s390x/tcg/insn-data.h.inc b/target/s390x/tcg/insn-data.h.inc
24
index XXXXXXX..XXXXXXX 100644
25
--- a/target/s390x/tcg/insn-data.h.inc
26
+++ b/target/s390x/tcg/insn-data.h.inc
27
@@ -XXX,XX +XXX,XX @@
28
/* COMPARE DOUBLE AND SWAP */
29
D(0xbb00, CDS, RS_a, Z, r3_D32, r1_D32, new, r1_D32, cs, 0, MO_TEUQ)
30
D(0xeb31, CDSY, RSY_a, LD, r3_D32, r1_D32, new, r1_D32, cs, 0, MO_TEUQ)
31
- C(0xeb3e, CDSG, RSY_a, Z, 0, 0, 0, 0, cdsg, 0)
32
+ C(0xeb3e, CDSG, RSY_a, Z, la2, r3_D64, 0, r1_D64, cdsg, 0)
33
/* COMPARE AND SWAP AND STORE */
34
C(0xc802, CSST, SSF, CASS, la1, a2, 0, 0, csst, 0)
35
36
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/target/s390x/tcg/mem_helper.c
39
+++ b/target/s390x/tcg/mem_helper.c
40
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
41
return cc;
42
}
43
44
-void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
45
- uint32_t r1, uint32_t r3)
46
-{
47
- uintptr_t ra = GETPC();
48
- Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
49
- Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
50
- Int128 oldv;
51
- uint64_t oldh, oldl;
52
- bool fail;
53
-
54
- check_alignment(env, addr, 16, ra);
55
-
56
- oldh = cpu_ldq_data_ra(env, addr + 0, ra);
57
- oldl = cpu_ldq_data_ra(env, addr + 8, ra);
58
-
59
- oldv = int128_make128(oldl, oldh);
60
- fail = !int128_eq(oldv, cmpv);
61
- if (fail) {
62
- newv = oldv;
63
- }
64
-
65
- cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
66
- cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
67
-
68
- env->cc_op = fail;
69
- env->regs[r1] = int128_gethi(oldv);
70
- env->regs[r1 + 1] = int128_getlo(oldv);
71
-}
72
-
73
-void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
74
- uint32_t r1, uint32_t r3)
75
-{
76
- uintptr_t ra = GETPC();
77
- Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
78
- Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
79
- int mem_idx;
80
- MemOpIdx oi;
81
- Int128 oldv;
82
- bool fail;
83
-
84
- assert(HAVE_CMPXCHG128);
85
-
86
- mem_idx = cpu_mmu_index(env, false);
87
- oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
88
- oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
89
- fail = !int128_eq(oldv, cmpv);
90
-
91
- env->cc_op = fail;
92
- env->regs[r1] = int128_gethi(oldv);
93
- env->regs[r1 + 1] = int128_getlo(oldv);
94
-}
95
-
96
static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
97
uint64_t a2, bool parallel)
98
{
99
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
100
index XXXXXXX..XXXXXXX 100644
101
--- a/target/s390x/tcg/translate.c
102
+++ b/target/s390x/tcg/translate.c
103
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_cs(DisasContext *s, DisasOps *o)
104
static DisasJumpType op_cdsg(DisasContext *s, DisasOps *o)
105
{
106
int r1 = get_field(s, r1);
107
- int r3 = get_field(s, r3);
108
- int d2 = get_field(s, d2);
109
- int b2 = get_field(s, b2);
110
- DisasJumpType ret = DISAS_NEXT;
111
- TCGv_i64 addr;
112
- TCGv_i32 t_r1, t_r3;
113
114
- /* Note that R1:R1+1 = expected value and R3:R3+1 = new value. */
115
- addr = get_address(s, 0, b2, d2);
116
- t_r1 = tcg_const_i32(r1);
117
- t_r3 = tcg_const_i32(r3);
118
- if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
119
- gen_helper_cdsg(cpu_env, addr, t_r1, t_r3);
120
- } else if (HAVE_CMPXCHG128) {
121
- gen_helper_cdsg_parallel(cpu_env, addr, t_r1, t_r3);
122
- } else {
123
- gen_helper_exit_atomic(cpu_env);
124
- ret = DISAS_NORETURN;
125
- }
126
- tcg_temp_free_i64(addr);
127
- tcg_temp_free_i32(t_r1);
128
- tcg_temp_free_i32(t_r3);
129
+ o->out_128 = tcg_temp_new_i128();
130
+ tcg_gen_concat_i64_i128(o->out_128, regs[r1 + 1], regs[r1]);
131
132
- set_cc_static(s);
133
- return ret;
134
+ /* Note out (R1:R1+1) = expected value and in2 (R3:R3+1) = new value. */
135
+ tcg_gen_atomic_cmpxchg_i128(o->out_128, o->addr1, o->out_128, o->in2_128,
136
+ get_mem_index(s), MO_BE | MO_128 | MO_ALIGN);
137
+
138
+ /*
139
+ * Extract result into cc_dst:cc_src, compare vs the expected value
140
+ * in the as yet unmodified input registers, then update CC_OP.
141
+ */
142
+ tcg_gen_extr_i128_i64(cc_src, cc_dst, o->out_128);
143
+ tcg_gen_xor_i64(cc_dst, cc_dst, regs[r1]);
144
+ tcg_gen_xor_i64(cc_src, cc_src, regs[r1 + 1]);
145
+ tcg_gen_or_i64(cc_dst, cc_dst, cc_src);
146
+ set_cc_nz_u64(s, cc_dst);
147
+
148
+ return DISAS_NEXT;
149
}
150
151
static DisasJumpType op_csst(DisasContext *s, DisasOps *o)
152
@@ -XXX,XX +XXX,XX @@ static void wout_r1_D32(DisasContext *s, DisasOps *o)
153
}
154
#define SPEC_wout_r1_D32 SPEC_r1_even
155
156
+static void wout_r1_D64(DisasContext *s, DisasOps *o)
157
+{
158
+ int r1 = get_field(s, r1);
159
+ tcg_gen_extr_i128_i64(regs[r1 + 1], regs[r1], o->out_128);
160
+}
161
+#define SPEC_wout_r1_D64 SPEC_r1_even
162
+
163
static void wout_r3_P32(DisasContext *s, DisasOps *o)
164
{
165
int r3 = get_field(s, r3);
166
@@ -XXX,XX +XXX,XX @@ static void in2_r3(DisasContext *s, DisasOps *o)
167
}
168
#define SPEC_in2_r3 0
169
170
+static void in2_r3_D64(DisasContext *s, DisasOps *o)
171
+{
172
+ int r3 = get_field(s, r3);
173
+ o->in2_128 = tcg_temp_new_i128();
174
+ tcg_gen_concat_i64_i128(o->in2_128, regs[r3 + 1], regs[r3]);
175
+}
176
+#define SPEC_in2_r3_D64 SPEC_r3_even
177
+
178
static void in2_r3_sr32(DisasContext *s, DisasOps *o)
179
{
180
o->in2 = tcg_temp_new_i64();
181
--
182
2.34.1
diff view generated by jsdifflib
New patch
1
This case is trivial to implement inline.
1
2
3
Reviewed-by: David Hildenbrand <david@redhat.com>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
target/s390x/tcg/translate.c | 3 +++
7
1 file changed, 3 insertions(+)
8
9
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/target/s390x/tcg/translate.c
12
+++ b/target/s390x/tcg/translate.c
13
@@ -XXX,XX +XXX,XX @@ static void gen_op_calc_cc(DisasContext *s)
14
/* env->cc_op already is the cc value */
15
break;
16
case CC_OP_NZ:
17
+ tcg_gen_setcondi_i64(TCG_COND_NE, cc_dst, cc_dst, 0);
18
+ tcg_gen_extrl_i64_i32(cc_op, cc_dst);
19
+ break;
20
case CC_OP_ABS_64:
21
case CC_OP_NABS_64:
22
case CC_OP_ABS_32:
23
--
24
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
target/i386/tcg/translate.c | 48 ++++++++++++++++++++++++-------------
5
1 file changed, 31 insertions(+), 17 deletions(-)
1
6
7
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/target/i386/tcg/translate.c
10
+++ b/target/i386/tcg/translate.c
11
@@ -XXX,XX +XXX,XX @@ static void gen_sty_env_A0(DisasContext *s, int offset, bool align)
12
#include "emit.c.inc"
13
#include "decode-new.c.inc"
14
15
+static void gen_cmpxchg8b(DisasContext *s, CPUX86State *env, int modrm)
16
+{
17
+ gen_lea_modrm(env, s, modrm);
18
+
19
+ if ((s->prefix & PREFIX_LOCK) &&
20
+ (tb_cflags(s->base.tb) & CF_PARALLEL)) {
21
+ gen_helper_cmpxchg8b(cpu_env, s->A0);
22
+ } else {
23
+ gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
24
+ }
25
+ set_cc_op(s, CC_OP_EFLAGS);
26
+}
27
+
28
+#ifdef TARGET_X86_64
29
+static void gen_cmpxchg16b(DisasContext *s, CPUX86State *env, int modrm)
30
+{
31
+ gen_lea_modrm(env, s, modrm);
32
+
33
+ if ((s->prefix & PREFIX_LOCK) &&
34
+ (tb_cflags(s->base.tb) & CF_PARALLEL)) {
35
+ gen_helper_cmpxchg16b(cpu_env, s->A0);
36
+ } else {
37
+ gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
38
+ }
39
+ set_cc_op(s, CC_OP_EFLAGS);
40
+}
41
+#endif
42
+
43
/* convert one instruction. s->base.is_jmp is set if the translation must
44
be stopped. Return the next pc value */
45
static bool disas_insn(DisasContext *s, CPUState *cpu)
46
@@ -XXX,XX +XXX,XX @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
47
if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) {
48
goto illegal_op;
49
}
50
- gen_lea_modrm(env, s, modrm);
51
- if ((s->prefix & PREFIX_LOCK) &&
52
- (tb_cflags(s->base.tb) & CF_PARALLEL)) {
53
- gen_helper_cmpxchg16b(cpu_env, s->A0);
54
- } else {
55
- gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
56
- }
57
- set_cc_op(s, CC_OP_EFLAGS);
58
+ gen_cmpxchg16b(s, env, modrm);
59
break;
60
}
61
-#endif
62
+#endif
63
if (!(s->cpuid_features & CPUID_CX8)) {
64
goto illegal_op;
65
}
66
- gen_lea_modrm(env, s, modrm);
67
- if ((s->prefix & PREFIX_LOCK) &&
68
- (tb_cflags(s->base.tb) & CF_PARALLEL)) {
69
- gen_helper_cmpxchg8b(cpu_env, s->A0);
70
- } else {
71
- gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
72
- }
73
- set_cc_op(s, CC_OP_EFLAGS);
74
+ gen_cmpxchg8b(s, env, modrm);
75
break;
76
77
case 7: /* RDSEED */
78
--
79
2.34.1
80
81
diff view generated by jsdifflib
New patch
1
Use tcg_gen_atomic_cmpxchg_i64 for the atomic case,
2
and tcg_gen_nonatomic_cmpxchg_i64 otherwise.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
target/i386/helper.h | 2 --
9
target/i386/tcg/mem_helper.c | 57 ------------------------------------
10
target/i386/tcg/translate.c | 54 ++++++++++++++++++++++++++++++----
11
3 files changed, 49 insertions(+), 64 deletions(-)
12
13
diff --git a/target/i386/helper.h b/target/i386/helper.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/i386/helper.h
16
+++ b/target/i386/helper.h
17
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_1(rsm, void, env)
18
#endif /* !CONFIG_USER_ONLY */
19
20
DEF_HELPER_2(into, void, env, int)
21
-DEF_HELPER_2(cmpxchg8b_unlocked, void, env, tl)
22
-DEF_HELPER_2(cmpxchg8b, void, env, tl)
23
#ifdef TARGET_X86_64
24
DEF_HELPER_2(cmpxchg16b_unlocked, void, env, tl)
25
DEF_HELPER_2(cmpxchg16b, void, env, tl)
26
diff --git a/target/i386/tcg/mem_helper.c b/target/i386/tcg/mem_helper.c
27
index XXXXXXX..XXXXXXX 100644
28
--- a/target/i386/tcg/mem_helper.c
29
+++ b/target/i386/tcg/mem_helper.c
30
@@ -XXX,XX +XXX,XX @@
31
#include "tcg/tcg.h"
32
#include "helper-tcg.h"
33
34
-void helper_cmpxchg8b_unlocked(CPUX86State *env, target_ulong a0)
35
-{
36
- uintptr_t ra = GETPC();
37
- uint64_t oldv, cmpv, newv;
38
- int eflags;
39
-
40
- eflags = cpu_cc_compute_all(env, CC_OP);
41
-
42
- cmpv = deposit64(env->regs[R_EAX], 32, 32, env->regs[R_EDX]);
43
- newv = deposit64(env->regs[R_EBX], 32, 32, env->regs[R_ECX]);
44
-
45
- oldv = cpu_ldq_data_ra(env, a0, ra);
46
- newv = (cmpv == oldv ? newv : oldv);
47
- /* always do the store */
48
- cpu_stq_data_ra(env, a0, newv, ra);
49
-
50
- if (oldv == cmpv) {
51
- eflags |= CC_Z;
52
- } else {
53
- env->regs[R_EAX] = (uint32_t)oldv;
54
- env->regs[R_EDX] = (uint32_t)(oldv >> 32);
55
- eflags &= ~CC_Z;
56
- }
57
- CC_SRC = eflags;
58
-}
59
-
60
-void helper_cmpxchg8b(CPUX86State *env, target_ulong a0)
61
-{
62
-#ifdef CONFIG_ATOMIC64
63
- uint64_t oldv, cmpv, newv;
64
- int eflags;
65
-
66
- eflags = cpu_cc_compute_all(env, CC_OP);
67
-
68
- cmpv = deposit64(env->regs[R_EAX], 32, 32, env->regs[R_EDX]);
69
- newv = deposit64(env->regs[R_EBX], 32, 32, env->regs[R_ECX]);
70
-
71
- {
72
- uintptr_t ra = GETPC();
73
- int mem_idx = cpu_mmu_index(env, false);
74
- MemOpIdx oi = make_memop_idx(MO_TEUQ, mem_idx);
75
- oldv = cpu_atomic_cmpxchgq_le_mmu(env, a0, cmpv, newv, oi, ra);
76
- }
77
-
78
- if (oldv == cmpv) {
79
- eflags |= CC_Z;
80
- } else {
81
- env->regs[R_EAX] = (uint32_t)oldv;
82
- env->regs[R_EDX] = (uint32_t)(oldv >> 32);
83
- eflags &= ~CC_Z;
84
- }
85
- CC_SRC = eflags;
86
-#else
87
- cpu_loop_exit_atomic(env_cpu(env), GETPC());
88
-#endif /* CONFIG_ATOMIC64 */
89
-}
90
-
91
#ifdef TARGET_X86_64
92
void helper_cmpxchg16b_unlocked(CPUX86State *env, target_ulong a0)
93
{
94
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
95
index XXXXXXX..XXXXXXX 100644
96
--- a/target/i386/tcg/translate.c
97
+++ b/target/i386/tcg/translate.c
98
@@ -XXX,XX +XXX,XX @@ static void gen_sty_env_A0(DisasContext *s, int offset, bool align)
99
100
static void gen_cmpxchg8b(DisasContext *s, CPUX86State *env, int modrm)
101
{
102
+ TCGv_i64 cmp, val, old;
103
+ TCGv Z;
104
+
105
gen_lea_modrm(env, s, modrm);
106
107
- if ((s->prefix & PREFIX_LOCK) &&
108
- (tb_cflags(s->base.tb) & CF_PARALLEL)) {
109
- gen_helper_cmpxchg8b(cpu_env, s->A0);
110
+ cmp = tcg_temp_new_i64();
111
+ val = tcg_temp_new_i64();
112
+ old = tcg_temp_new_i64();
113
+
114
+ /* Construct the comparison values from the register pair. */
115
+ tcg_gen_concat_tl_i64(cmp, cpu_regs[R_EAX], cpu_regs[R_EDX]);
116
+ tcg_gen_concat_tl_i64(val, cpu_regs[R_EBX], cpu_regs[R_ECX]);
117
+
118
+ /* Only require atomic with LOCK; non-parallel handled in generator. */
119
+ if (s->prefix & PREFIX_LOCK) {
120
+ tcg_gen_atomic_cmpxchg_i64(old, s->A0, cmp, val, s->mem_index, MO_TEUQ);
121
} else {
122
- gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
123
+ tcg_gen_nonatomic_cmpxchg_i64(old, s->A0, cmp, val,
124
+ s->mem_index, MO_TEUQ);
125
}
126
- set_cc_op(s, CC_OP_EFLAGS);
127
+ tcg_temp_free_i64(val);
128
+
129
+ /* Set tmp0 to match the required value of Z. */
130
+ tcg_gen_setcond_i64(TCG_COND_EQ, cmp, old, cmp);
131
+ Z = tcg_temp_new();
132
+ tcg_gen_trunc_i64_tl(Z, cmp);
133
+ tcg_temp_free_i64(cmp);
134
+
135
+ /*
136
+ * Extract the result values for the register pair.
137
+ * For 32-bit, we may do this unconditionally, because on success (Z=1),
138
+ * the old value matches the previous value in EDX:EAX. For x86_64,
139
+ * the store must be conditional, because we must leave the source
140
+ * registers unchanged on success, and zero-extend the writeback
141
+ * on failure (Z=0).
142
+ */
143
+ if (TARGET_LONG_BITS == 32) {
144
+ tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], old);
145
+ } else {
146
+ TCGv zero = tcg_constant_tl(0);
147
+
148
+ tcg_gen_extr_i64_tl(s->T0, s->T1, old);
149
+ tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EAX], Z, zero,
150
+ s->T0, cpu_regs[R_EAX]);
151
+ tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EDX], Z, zero,
152
+ s->T1, cpu_regs[R_EDX]);
153
+ }
154
+ tcg_temp_free_i64(old);
155
+
156
+ /* Update Z. */
157
+ gen_compute_eflags(s);
158
+ tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, Z, ctz32(CC_Z), 1);
159
+ tcg_temp_free(Z);
160
}
161
162
#ifdef TARGET_X86_64
163
--
164
2.34.1
165
166
diff view generated by jsdifflib
New patch
1
Use tcg_gen_atomic_cmpxchg_i128 for the atomic case,
2
and tcg_gen_qemu_ld/st_i128 otherwise.
1
3
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/i386/helper.h | 4 ---
8
target/i386/tcg/mem_helper.c | 69 ------------------------------------
9
target/i386/tcg/translate.c | 44 ++++++++++++++++++++---
10
3 files changed, 39 insertions(+), 78 deletions(-)
11
12
diff --git a/target/i386/helper.h b/target/i386/helper.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/i386/helper.h
15
+++ b/target/i386/helper.h
16
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_1(rsm, void, env)
17
#endif /* !CONFIG_USER_ONLY */
18
19
DEF_HELPER_2(into, void, env, int)
20
-#ifdef TARGET_X86_64
21
-DEF_HELPER_2(cmpxchg16b_unlocked, void, env, tl)
22
-DEF_HELPER_2(cmpxchg16b, void, env, tl)
23
-#endif
24
DEF_HELPER_FLAGS_1(single_step, TCG_CALL_NO_WG, noreturn, env)
25
DEF_HELPER_1(rechecking_single_step, void, env)
26
DEF_HELPER_1(cpuid, void, env)
27
diff --git a/target/i386/tcg/mem_helper.c b/target/i386/tcg/mem_helper.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/i386/tcg/mem_helper.c
30
+++ b/target/i386/tcg/mem_helper.c
31
@@ -XXX,XX +XXX,XX @@
32
#include "tcg/tcg.h"
33
#include "helper-tcg.h"
34
35
-#ifdef TARGET_X86_64
36
-void helper_cmpxchg16b_unlocked(CPUX86State *env, target_ulong a0)
37
-{
38
- uintptr_t ra = GETPC();
39
- Int128 oldv, cmpv, newv;
40
- uint64_t o0, o1;
41
- int eflags;
42
- bool success;
43
-
44
- if ((a0 & 0xf) != 0) {
45
- raise_exception_ra(env, EXCP0D_GPF, GETPC());
46
- }
47
- eflags = cpu_cc_compute_all(env, CC_OP);
48
-
49
- cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]);
50
- newv = int128_make128(env->regs[R_EBX], env->regs[R_ECX]);
51
-
52
- o0 = cpu_ldq_data_ra(env, a0 + 0, ra);
53
- o1 = cpu_ldq_data_ra(env, a0 + 8, ra);
54
-
55
- oldv = int128_make128(o0, o1);
56
- success = int128_eq(oldv, cmpv);
57
- if (!success) {
58
- newv = oldv;
59
- }
60
-
61
- cpu_stq_data_ra(env, a0 + 0, int128_getlo(newv), ra);
62
- cpu_stq_data_ra(env, a0 + 8, int128_gethi(newv), ra);
63
-
64
- if (success) {
65
- eflags |= CC_Z;
66
- } else {
67
- env->regs[R_EAX] = int128_getlo(oldv);
68
- env->regs[R_EDX] = int128_gethi(oldv);
69
- eflags &= ~CC_Z;
70
- }
71
- CC_SRC = eflags;
72
-}
73
-
74
-void helper_cmpxchg16b(CPUX86State *env, target_ulong a0)
75
-{
76
- uintptr_t ra = GETPC();
77
-
78
- if ((a0 & 0xf) != 0) {
79
- raise_exception_ra(env, EXCP0D_GPF, ra);
80
- } else if (HAVE_CMPXCHG128) {
81
- int eflags = cpu_cc_compute_all(env, CC_OP);
82
-
83
- Int128 cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]);
84
- Int128 newv = int128_make128(env->regs[R_EBX], env->regs[R_ECX]);
85
-
86
- int mem_idx = cpu_mmu_index(env, false);
87
- MemOpIdx oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
88
- Int128 oldv = cpu_atomic_cmpxchgo_le_mmu(env, a0, cmpv, newv, oi, ra);
89
-
90
- if (int128_eq(oldv, cmpv)) {
91
- eflags |= CC_Z;
92
- } else {
93
- env->regs[R_EAX] = int128_getlo(oldv);
94
- env->regs[R_EDX] = int128_gethi(oldv);
95
- eflags &= ~CC_Z;
96
- }
97
- CC_SRC = eflags;
98
- } else {
99
- cpu_loop_exit_atomic(env_cpu(env), ra);
100
- }
101
-}
102
-#endif
103
-
104
void helper_boundw(CPUX86State *env, target_ulong a0, int v)
105
{
106
int low, high;
107
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
108
index XXXXXXX..XXXXXXX 100644
109
--- a/target/i386/tcg/translate.c
110
+++ b/target/i386/tcg/translate.c
111
@@ -XXX,XX +XXX,XX @@ static void gen_cmpxchg8b(DisasContext *s, CPUX86State *env, int modrm)
112
#ifdef TARGET_X86_64
113
static void gen_cmpxchg16b(DisasContext *s, CPUX86State *env, int modrm)
114
{
115
+ MemOp mop = MO_TE | MO_128 | MO_ALIGN;
116
+ TCGv_i64 t0, t1;
117
+ TCGv_i128 cmp, val;
118
+
119
gen_lea_modrm(env, s, modrm);
120
121
- if ((s->prefix & PREFIX_LOCK) &&
122
- (tb_cflags(s->base.tb) & CF_PARALLEL)) {
123
- gen_helper_cmpxchg16b(cpu_env, s->A0);
124
+ cmp = tcg_temp_new_i128();
125
+ val = tcg_temp_new_i128();
126
+ tcg_gen_concat_i64_i128(cmp, cpu_regs[R_EAX], cpu_regs[R_EDX]);
127
+ tcg_gen_concat_i64_i128(val, cpu_regs[R_EBX], cpu_regs[R_ECX]);
128
+
129
+ /* Only require atomic with LOCK; non-parallel handled in generator. */
130
+ if (s->prefix & PREFIX_LOCK) {
131
+ tcg_gen_atomic_cmpxchg_i128(val, s->A0, cmp, val, s->mem_index, mop);
132
} else {
133
- gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
134
+ tcg_gen_nonatomic_cmpxchg_i128(val, s->A0, cmp, val, s->mem_index, mop);
135
}
136
- set_cc_op(s, CC_OP_EFLAGS);
137
+
138
+ tcg_gen_extr_i128_i64(s->T0, s->T1, val);
139
+ tcg_temp_free_i128(cmp);
140
+ tcg_temp_free_i128(val);
141
+
142
+ /* Determine success after the fact. */
143
+ t0 = tcg_temp_new_i64();
144
+ t1 = tcg_temp_new_i64();
145
+ tcg_gen_xor_i64(t0, s->T0, cpu_regs[R_EAX]);
146
+ tcg_gen_xor_i64(t1, s->T1, cpu_regs[R_EDX]);
147
+ tcg_gen_or_i64(t0, t0, t1);
148
+ tcg_temp_free_i64(t1);
149
+
150
+ /* Update Z. */
151
+ gen_compute_eflags(s);
152
+ tcg_gen_setcondi_i64(TCG_COND_EQ, t0, t0, 0);
153
+ tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, t0, ctz32(CC_Z), 1);
154
+ tcg_temp_free_i64(t0);
155
+
156
+ /*
157
+ * Extract the result values for the register pair. We may do this
158
+ * unconditionally, because on success (Z=1), the old value matches
159
+ * the previous value in RDX:RAX.
160
+ */
161
+ tcg_gen_mov_i64(cpu_regs[R_EAX], s->T0);
162
+ tcg_gen_mov_i64(cpu_regs[R_EDX], s->T1);
163
}
164
#endif
165
166
--
167
2.34.1
168
169
diff view generated by jsdifflib
New patch
1
'offset' should be bits [23:5] of LDR instruction, rather than [4:0].
1
2
3
Fixes: d59d83a1c388 ("tcg/aarch64: Reorg goto_tb implementation")
4
Reviewed-by: Zenghui Yu <yuzenghui@huawei.com>
5
Reported-by: Zenghui Yu <yuzenghui@huawei.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/aarch64/tcg-target.c.inc | 2 +-
9
1 file changed, 1 insertion(+), 1 deletion(-)
10
11
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/aarch64/tcg-target.c.inc
14
+++ b/tcg/aarch64/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
16
ptrdiff_t i_offset = i_addr - jmp_rx;
17
18
/* Note that we asserted this in range in tcg_out_goto_tb. */
19
- insn = deposit32(I3305_LDR | TCG_REG_TMP, 0, 5, i_offset >> 2);
20
+ insn = deposit32(I3305_LDR | TCG_REG_TMP, 5, 19, i_offset >> 2);
21
}
22
qatomic_set((uint32_t *)jmp_rw, insn);
23
flush_idcache_range(jmp_rx, jmp_rw, 4);
24
--
25
2.34.1
diff view generated by jsdifflib