1
The following changes since commit 2d20a57453f6a206938cbbf77bed0b378c806c1f:
1
The following changes since commit 579510e196a544b42bd8bca9cc61688d4d1211ac:
2
2
3
Merge tag 'pull-fixes-for-7.1-200422-1' of https://github.com/stsquad/qemu into staging (2022-04-20 11:13:08 -0700)
3
Merge tag 'pull-monitor-2023-02-03-v2' of https://repo.or.cz/qemu/armbru into staging (2023-02-04 10:19:55 +0000)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20220420
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230204
8
8
9
for you to fetch changes up to a61532faa5a4d5e021e35b6a4a1e180c72d4a22f:
9
for you to fetch changes up to a2495ede07498ee36b18b03e7038ba30c9871bb2:
10
10
11
tcg: Add tcg_constant_ptr (2022-04-20 12:12:47 -0700)
11
tcg/aarch64: Fix patching of LDR in tb_target_set_jmp_target (2023-02-04 06:19:43 -1000)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Cleanup sysemu/tcg.h usage.
14
tcg: Add support for TCGv_i128 in parameters and returns.
15
Fix indirect lowering vs cond branches
15
tcg: Add support for TCGv_i128 in cmpxchg.
16
Remove ATOMIC_MMU_IDX
16
tcg: Test CPUJumpCache in tb_jmp_cache_clear_page
17
Add tcg_constant_ptr
17
tcg: Split out tcg_gen_nonatomic_cmpxchg_i{32,64}
18
tcg/aarch64: Fix patching of LDR in tb_target_set_jmp_target
19
target/arm: Use tcg_gen_atomic_cmpxchg_i128
20
target/i386: Use tcg_gen_atomic_cmpxchg_i128
21
target/i386: Use tcg_gen_nonatomic_cmpxchg_i{32,64}
22
target/s390x: Use tcg_gen_atomic_cmpxchg_i128
23
target/s390x: Use TCGv_i128 in passing and returning float128
24
target/s390x: Implement CC_OP_NZ in gen_op_calc_cc
18
25
19
----------------------------------------------------------------
26
----------------------------------------------------------------
20
Richard Henderson (3):
27
Eric Auger (1):
21
tcg: Fix indirect lowering vs TCG_OPF_COND_BRANCH
28
accel/tcg: Test CPUJumpCache in tb_jmp_cache_clear_page
22
accel/tcg: Remove ATOMIC_MMU_IDX
23
tcg: Add tcg_constant_ptr
24
29
25
Thomas Huth (1):
30
Ilya Leoshkevich (3):
26
Don't include sysemu/tcg.h if it is not necessary
31
tests/tcg/s390x: Add div.c
32
tests/tcg/s390x: Add clst.c
33
tests/tcg/s390x: Add cdsg.c
27
34
28
include/tcg/tcg.h | 4 ++++
35
Richard Henderson (36):
29
accel/tcg/cputlb.c | 1 -
36
tcg: Init temp_subindex in liveness_pass_2
30
accel/tcg/hmp.c | 1 -
37
tcg: Define TCG_TYPE_I128 and related helper macros
31
accel/tcg/tcg-accel-ops-icount.c | 1 -
38
tcg: Handle dh_typecode_i128 with TCG_CALL_{RET,ARG}_NORMAL
32
accel/tcg/user-exec.c | 1 -
39
tcg: Allocate objects contiguously in temp_allocate_frame
33
bsd-user/main.c | 1 -
40
tcg: Introduce tcg_out_addi_ptr
34
hw/virtio/vhost.c | 1 -
41
tcg: Add TCG_CALL_{RET,ARG}_BY_REF
35
linux-user/main.c | 1 -
42
tcg: Introduce tcg_target_call_oarg_reg
36
monitor/misc.c | 1 -
43
tcg: Add TCG_CALL_RET_BY_VEC
37
target/arm/helper.c | 1 -
44
include/qemu/int128: Use Int128 structure for TCI
38
target/s390x/cpu_models_sysemu.c | 1 -
45
tcg/i386: Add TCG_TARGET_CALL_{RET,ARG}_I128
39
target/s390x/helper.c | 1 -
46
tcg/tci: Fix big-endian return register ordering
40
tcg/tcg.c | 34 +++++++++++++++++++++++++++-------
47
tcg/tci: Add TCG_TARGET_CALL_{RET,ARG}_I128
41
13 files changed, 31 insertions(+), 18 deletions(-)
48
tcg: Add TCG_TARGET_CALL_{RET,ARG}_I128
49
tcg: Add temp allocation for TCGv_i128
50
tcg: Add basic data movement for TCGv_i128
51
tcg: Add guest load/store primitives for TCGv_i128
52
tcg: Add tcg_gen_{non}atomic_cmpxchg_i128
53
tcg: Split out tcg_gen_nonatomic_cmpxchg_i{32,64}
54
target/arm: Use tcg_gen_atomic_cmpxchg_i128 for STXP
55
target/arm: Use tcg_gen_atomic_cmpxchg_i128 for CASP
56
target/ppc: Use tcg_gen_atomic_cmpxchg_i128 for STQCX
57
tests/tcg/s390x: Add long-double.c
58
target/s390x: Use a single return for helper_divs32/u32
59
target/s390x: Use a single return for helper_divs64/u64
60
target/s390x: Use Int128 for return from CLST
61
target/s390x: Use Int128 for return from CKSM
62
target/s390x: Use Int128 for return from TRE
63
target/s390x: Copy wout_x1 to wout_x1_P
64
target/s390x: Use Int128 for returning float128
65
target/s390x: Use Int128 for passing float128
66
target/s390x: Use tcg_gen_atomic_cmpxchg_i128 for CDSG
67
target/s390x: Implement CC_OP_NZ in gen_op_calc_cc
68
target/i386: Split out gen_cmpxchg8b, gen_cmpxchg16b
69
target/i386: Inline cmpxchg8b
70
target/i386: Inline cmpxchg16b
71
tcg/aarch64: Fix patching of LDR in tb_target_set_jmp_target
72
73
accel/tcg/tcg-runtime.h | 11 ++
74
include/exec/cpu_ldst.h | 10 +
75
include/exec/helper-head.h | 7 +
76
include/qemu/atomic128.h | 29 ++-
77
include/qemu/int128.h | 25 ++-
78
include/tcg/tcg-op.h | 15 ++
79
include/tcg/tcg.h | 49 ++++-
80
target/arm/helper-a64.h | 8 -
81
target/i386/helper.h | 6 -
82
target/ppc/helper.h | 2 -
83
target/s390x/helper.h | 54 +++---
84
tcg/aarch64/tcg-target.h | 2 +
85
tcg/arm/tcg-target.h | 2 +
86
tcg/i386/tcg-target.h | 10 +
87
tcg/loongarch64/tcg-target.h | 2 +
88
tcg/mips/tcg-target.h | 2 +
89
tcg/riscv/tcg-target.h | 3 +
90
tcg/s390x/tcg-target.h | 2 +
91
tcg/sparc64/tcg-target.h | 2 +
92
tcg/tcg-internal.h | 17 ++
93
tcg/tci/tcg-target.h | 3 +
94
target/s390x/tcg/insn-data.h.inc | 60 +++---
95
accel/tcg/cputlb.c | 119 +++++++++++-
96
accel/tcg/user-exec.c | 66 +++++++
97
target/arm/helper-a64.c | 147 ---------------
98
target/arm/translate-a64.c | 121 ++++++------
99
target/i386/tcg/mem_helper.c | 126 -------------
100
target/i386/tcg/translate.c | 126 +++++++++++--
101
target/ppc/mem_helper.c | 44 -----
102
target/ppc/translate.c | 102 +++++-----
103
target/s390x/tcg/fpu_helper.c | 103 +++++-----
104
target/s390x/tcg/int_helper.c | 64 +++----
105
target/s390x/tcg/mem_helper.c | 77 +-------
106
target/s390x/tcg/translate.c | 212 ++++++++++++++-------
107
tcg/tcg-op.c | 393 +++++++++++++++++++++++++++++++++------
108
tcg/tcg.c | 308 ++++++++++++++++++++++++++----
109
tcg/tci.c | 65 +++----
110
tests/tcg/s390x/cdsg.c | 93 +++++++++
111
tests/tcg/s390x/clst.c | 82 ++++++++
112
tests/tcg/s390x/div.c | 75 ++++++++
113
tests/tcg/s390x/long-double.c | 24 +++
114
util/int128.c | 42 +++++
115
accel/tcg/atomic_common.c.inc | 45 +++++
116
tcg/aarch64/tcg-target.c.inc | 19 +-
117
tcg/arm/tcg-target.c.inc | 30 ++-
118
tcg/i386/tcg-target.c.inc | 52 +++++-
119
tcg/loongarch64/tcg-target.c.inc | 17 +-
120
tcg/mips/tcg-target.c.inc | 17 +-
121
tcg/ppc/tcg-target.c.inc | 20 +-
122
tcg/riscv/tcg-target.c.inc | 17 +-
123
tcg/s390x/tcg-target.c.inc | 16 +-
124
tcg/sparc64/tcg-target.c.inc | 19 +-
125
tcg/tci/tcg-target.c.inc | 27 ++-
126
tests/tcg/s390x/Makefile.target | 7 +
127
54 files changed, 2040 insertions(+), 956 deletions(-)
128
create mode 100644 tests/tcg/s390x/cdsg.c
129
create mode 100644 tests/tcg/s390x/clst.c
130
create mode 100644 tests/tcg/s390x/div.c
131
create mode 100644 tests/tcg/s390x/long-double.c
diff view generated by jsdifflib
New patch
1
From: Eric Auger <eric.auger@redhat.com>
1
2
3
After commit 4e4fa6c12d ("accel/tcg: Complete cpu initialization
4
before registration"), it looks the CPUJumpCache pointer can be NULL.
5
This causes a SIGSEV when running debug-wp-migration kvm unit test.
6
7
At the first place it should be clarified why this TCG code is called
8
with KVM acceleration. This may hide another bug.
9
10
Fixes: 4e4fa6c12d ("accel/tcg: Complete cpu initialization before registration")
11
Signed-off-by: Eric Auger <eric.auger@redhat.com>
12
Message-Id: <20230203171510.2867451-1-eric.auger@redhat.com>
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
---
15
accel/tcg/cputlb.c | 7 ++++++-
16
1 file changed, 6 insertions(+), 1 deletion(-)
17
18
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/accel/tcg/cputlb.c
21
+++ b/accel/tcg/cputlb.c
22
@@ -XXX,XX +XXX,XX @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
23
24
static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr)
25
{
26
- int i, i0 = tb_jmp_cache_hash_page(page_addr);
27
CPUJumpCache *jc = cpu->tb_jmp_cache;
28
+ int i, i0;
29
30
+ if (unlikely(!jc)) {
31
+ return;
32
+ }
33
+
34
+ i0 = tb_jmp_cache_hash_page(page_addr);
35
for (i = 0; i < TB_JMP_PAGE_SIZE; i++) {
36
qatomic_set(&jc->array[i0 + i].tb, NULL);
37
}
38
--
39
2.34.1
diff view generated by jsdifflib
1
With TCG_OPF_COND_BRANCH, we extended the lifetimes of
1
Correctly handle large types while lowering.
2
globals across extended basic blocks. This means that
3
the liveness computed in pass 1 does not kill globals
4
in the same way as normal temps.
5
2
6
Introduce TYPE_EBB to match this lifetime, so that we
3
Fixes: fac87bd2a49b ("tcg: Add temp_subindex to TCGTemp")
7
get correct register allocation for the temps that we
8
introduce during the indirect lowering pass.
9
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
Fixes: b4cb76e6208 ("tcg: Do not kill globals at conditional branches")
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
5
---
14
include/tcg/tcg.h | 2 ++
6
tcg/tcg.c | 1 +
15
tcg/tcg.c | 34 +++++++++++++++++++++++++++-------
7
1 file changed, 1 insertion(+)
16
2 files changed, 29 insertions(+), 7 deletions(-)
17
8
18
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
19
index XXXXXXX..XXXXXXX 100644
20
--- a/include/tcg/tcg.h
21
+++ b/include/tcg/tcg.h
22
@@ -XXX,XX +XXX,XX @@ typedef enum TCGTempVal {
23
typedef enum TCGTempKind {
24
/* Temp is dead at the end of all basic blocks. */
25
TEMP_NORMAL,
26
+ /* Temp is live across conditional branch, but dead otherwise. */
27
+ TEMP_EBB,
28
/* Temp is saved across basic blocks but dead at the end of TBs. */
29
TEMP_LOCAL,
30
/* Temp is saved across both basic blocks and translation blocks. */
31
diff --git a/tcg/tcg.c b/tcg/tcg.c
9
diff --git a/tcg/tcg.c b/tcg/tcg.c
32
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
33
--- a/tcg/tcg.c
11
--- a/tcg/tcg.c
34
+++ b/tcg/tcg.c
12
+++ b/tcg/tcg.c
35
@@ -XXX,XX +XXX,XX @@ void tcg_temp_free_internal(TCGTemp *ts)
36
TCGContext *s = tcg_ctx;
37
int k, idx;
38
39
- /* In order to simplify users of tcg_constant_*, silently ignore free. */
40
- if (ts->kind == TEMP_CONST) {
41
+ switch (ts->kind) {
42
+ case TEMP_CONST:
43
+ /*
44
+ * In order to simplify users of tcg_constant_*,
45
+ * silently ignore free.
46
+ */
47
return;
48
+ case TEMP_NORMAL:
49
+ case TEMP_LOCAL:
50
+ break;
51
+ default:
52
+ g_assert_not_reached();
53
}
54
55
#if defined(CONFIG_DEBUG_TCG)
56
@@ -XXX,XX +XXX,XX @@ void tcg_temp_free_internal(TCGTemp *ts)
57
}
58
#endif
59
60
- tcg_debug_assert(ts->kind < TEMP_GLOBAL);
61
tcg_debug_assert(ts->temp_allocated != 0);
62
ts->temp_allocated = 0;
63
64
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_start(TCGContext *s)
65
case TEMP_GLOBAL:
66
break;
67
case TEMP_NORMAL:
68
+ case TEMP_EBB:
69
val = TEMP_VAL_DEAD;
70
/* fall through */
71
case TEMP_LOCAL:
72
@@ -XXX,XX +XXX,XX @@ static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
73
case TEMP_LOCAL:
74
snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
75
break;
76
+ case TEMP_EBB:
77
+ snprintf(buf, buf_size, "ebb%d", idx - s->nb_globals);
78
+ break;
79
case TEMP_NORMAL:
80
snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
81
break;
82
@@ -XXX,XX +XXX,XX @@ static void la_bb_end(TCGContext *s, int ng, int nt)
83
state = TS_DEAD | TS_MEM;
84
break;
85
case TEMP_NORMAL:
86
+ case TEMP_EBB:
87
case TEMP_CONST:
88
state = TS_DEAD;
89
break;
90
@@ -XXX,XX +XXX,XX @@ static void la_global_sync(TCGContext *s, int ng)
91
}
92
93
/*
94
- * liveness analysis: conditional branch: all temps are dead,
95
- * globals and local temps should be synced.
96
+ * liveness analysis: conditional branch: all temps are dead unless
97
+ * explicitly live-across-conditional-branch, globals and local temps
98
+ * should be synced.
99
*/
100
static void la_bb_sync(TCGContext *s, int ng, int nt)
101
{
102
@@ -XXX,XX +XXX,XX @@ static void la_bb_sync(TCGContext *s, int ng, int nt)
103
case TEMP_NORMAL:
104
s->temps[i].state = TS_DEAD;
105
break;
106
+ case TEMP_EBB:
107
case TEMP_CONST:
108
continue;
109
default:
110
@@ -XXX,XX +XXX,XX @@ static bool liveness_pass_2(TCGContext *s)
13
@@ -XXX,XX +XXX,XX @@ static bool liveness_pass_2(TCGContext *s)
111
TCGTemp *dts = tcg_temp_alloc(s);
14
TCGTemp *dts = tcg_temp_alloc(s);
112
dts->type = its->type;
15
dts->type = its->type;
113
dts->base_type = its->base_type;
16
dts->base_type = its->base_type;
114
+ dts->kind = TEMP_EBB;
17
+ dts->temp_subindex = its->temp_subindex;
18
dts->kind = TEMP_EBB;
115
its->state_ptr = dts;
19
its->state_ptr = dts;
116
} else {
20
} else {
117
its->state_ptr = NULL;
118
@@ -XXX,XX +XXX,XX @@ static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
119
new_type = TEMP_VAL_MEM;
120
break;
121
case TEMP_NORMAL:
122
+ case TEMP_EBB:
123
new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
124
break;
125
case TEMP_CONST:
126
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
127
temp_save(s, ts, allocated_regs);
128
break;
129
case TEMP_NORMAL:
130
+ case TEMP_EBB:
131
/* The liveness analysis already ensures that temps are dead.
132
Keep an tcg_debug_assert for safety. */
133
tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
134
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
135
}
136
137
/*
138
- * At a conditional branch, we assume all temporaries are dead and
139
- * all globals and local temps are synced to their location.
140
+ * At a conditional branch, we assume all temporaries are dead unless
141
+ * explicitly live-across-conditional-branch; all globals and local
142
+ * temps are synced to their location.
143
*/
144
static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
145
{
146
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
147
case TEMP_NORMAL:
148
tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
149
break;
150
+ case TEMP_EBB:
151
case TEMP_CONST:
152
break;
153
default:
154
--
21
--
155
2.34.1
22
2.34.1
diff view generated by jsdifflib
New patch
1
Begin staging in support for TCGv_i128 with Int128.
2
Define the type enumerator, the typedef, and the
3
helper-head.h macros.
1
4
5
This cannot yet be used, because you can't allocate
6
temporaries of this new type.
7
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
12
include/exec/helper-head.h | 7 +++++++
13
include/tcg/tcg.h | 17 ++++++++++-------
14
2 files changed, 17 insertions(+), 7 deletions(-)
15
16
diff --git a/include/exec/helper-head.h b/include/exec/helper-head.h
17
index XXXXXXX..XXXXXXX 100644
18
--- a/include/exec/helper-head.h
19
+++ b/include/exec/helper-head.h
20
@@ -XXX,XX +XXX,XX @@
21
#define dh_alias_int i32
22
#define dh_alias_i64 i64
23
#define dh_alias_s64 i64
24
+#define dh_alias_i128 i128
25
#define dh_alias_f16 i32
26
#define dh_alias_f32 i32
27
#define dh_alias_f64 i64
28
@@ -XXX,XX +XXX,XX @@
29
#define dh_ctype_int int
30
#define dh_ctype_i64 uint64_t
31
#define dh_ctype_s64 int64_t
32
+#define dh_ctype_i128 Int128
33
#define dh_ctype_f16 uint32_t
34
#define dh_ctype_f32 float32
35
#define dh_ctype_f64 float64
36
@@ -XXX,XX +XXX,XX @@
37
#define dh_retvar_decl0_noreturn void
38
#define dh_retvar_decl0_i32 TCGv_i32 retval
39
#define dh_retvar_decl0_i64 TCGv_i64 retval
40
+#define dh_retval_decl0_i128 TCGv_i128 retval
41
#define dh_retvar_decl0_ptr TCGv_ptr retval
42
#define dh_retvar_decl0(t) glue(dh_retvar_decl0_, dh_alias(t))
43
44
@@ -XXX,XX +XXX,XX @@
45
#define dh_retvar_decl_noreturn
46
#define dh_retvar_decl_i32 TCGv_i32 retval,
47
#define dh_retvar_decl_i64 TCGv_i64 retval,
48
+#define dh_retvar_decl_i128 TCGv_i128 retval,
49
#define dh_retvar_decl_ptr TCGv_ptr retval,
50
#define dh_retvar_decl(t) glue(dh_retvar_decl_, dh_alias(t))
51
52
@@ -XXX,XX +XXX,XX @@
53
#define dh_retvar_noreturn NULL
54
#define dh_retvar_i32 tcgv_i32_temp(retval)
55
#define dh_retvar_i64 tcgv_i64_temp(retval)
56
+#define dh_retvar_i128 tcgv_i128_temp(retval)
57
#define dh_retvar_ptr tcgv_ptr_temp(retval)
58
#define dh_retvar(t) glue(dh_retvar_, dh_alias(t))
59
60
@@ -XXX,XX +XXX,XX @@
61
#define dh_typecode_i64 4
62
#define dh_typecode_s64 5
63
#define dh_typecode_ptr 6
64
+#define dh_typecode_i128 7
65
#define dh_typecode_int dh_typecode_s32
66
#define dh_typecode_f16 dh_typecode_i32
67
#define dh_typecode_f32 dh_typecode_i32
68
@@ -XXX,XX +XXX,XX @@
69
70
#define dh_callflag_i32 0
71
#define dh_callflag_i64 0
72
+#define dh_callflag_i128 0
73
#define dh_callflag_ptr 0
74
#define dh_callflag_void 0
75
#define dh_callflag_noreturn TCG_CALL_NO_RETURN
76
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
77
index XXXXXXX..XXXXXXX 100644
78
--- a/include/tcg/tcg.h
79
+++ b/include/tcg/tcg.h
80
@@ -XXX,XX +XXX,XX @@ typedef struct TCGPool {
81
typedef enum TCGType {
82
TCG_TYPE_I32,
83
TCG_TYPE_I64,
84
+ TCG_TYPE_I128,
85
86
TCG_TYPE_V64,
87
TCG_TYPE_V128,
88
@@ -XXX,XX +XXX,XX @@ typedef tcg_target_ulong TCGArg;
89
in tcg/README. Target CPU front-end code uses these types to deal
90
with TCG variables as it emits TCG code via the tcg_gen_* functions.
91
They come in several flavours:
92
- * TCGv_i32 : 32 bit integer type
93
- * TCGv_i64 : 64 bit integer type
94
- * TCGv_ptr : a host pointer type
95
- * TCGv_vec : a host vector type; the exact size is not exposed
96
- to the CPU front-end code.
97
- * TCGv : an integer type the same size as target_ulong
98
- (an alias for either TCGv_i32 or TCGv_i64)
99
+ * TCGv_i32 : 32 bit integer type
100
+ * TCGv_i64 : 64 bit integer type
101
+ * TCGv_i128 : 128 bit integer type
102
+ * TCGv_ptr : a host pointer type
103
+ * TCGv_vec : a host vector type; the exact size is not exposed
104
+ to the CPU front-end code.
105
+ * TCGv : an integer type the same size as target_ulong
106
+ (an alias for either TCGv_i32 or TCGv_i64)
107
The compiler's type checking will complain if you mix them
108
up and pass the wrong sized TCGv to a function.
109
110
@@ -XXX,XX +XXX,XX @@ typedef tcg_target_ulong TCGArg;
111
112
typedef struct TCGv_i32_d *TCGv_i32;
113
typedef struct TCGv_i64_d *TCGv_i64;
114
+typedef struct TCGv_i128_d *TCGv_i128;
115
typedef struct TCGv_ptr_d *TCGv_ptr;
116
typedef struct TCGv_vec_d *TCGv_vec;
117
typedef TCGv_ptr TCGv_env;
118
--
119
2.34.1
120
121
diff view generated by jsdifflib
New patch
1
Many hosts pass and return 128-bit quantities like sequential
2
64-bit quantities. Treat this just like we currently break
3
down 64-bit quantities for a 32-bit host.
1
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/tcg.c | 37 +++++++++++++++++++++++++++++++++----
9
1 file changed, 33 insertions(+), 4 deletions(-)
10
11
diff --git a/tcg/tcg.c b/tcg/tcg.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/tcg.c
14
+++ b/tcg/tcg.c
15
@@ -XXX,XX +XXX,XX @@ static void init_call_layout(TCGHelperInfo *info)
16
case dh_typecode_s64:
17
info->nr_out = 64 / TCG_TARGET_REG_BITS;
18
info->out_kind = TCG_CALL_RET_NORMAL;
19
+ assert(info->nr_out <= ARRAY_SIZE(tcg_target_call_oarg_regs));
20
+ break;
21
+ case dh_typecode_i128:
22
+ info->nr_out = 128 / TCG_TARGET_REG_BITS;
23
+ info->out_kind = TCG_CALL_RET_NORMAL; /* TODO */
24
+ switch (/* TODO */ TCG_CALL_RET_NORMAL) {
25
+ case TCG_CALL_RET_NORMAL:
26
+ assert(info->nr_out <= ARRAY_SIZE(tcg_target_call_oarg_regs));
27
+ break;
28
+ default:
29
+ qemu_build_not_reached();
30
+ }
31
break;
32
default:
33
g_assert_not_reached();
34
}
35
- assert(info->nr_out <= ARRAY_SIZE(tcg_target_call_oarg_regs));
36
37
/*
38
* Parse and place function arguments.
39
@@ -XXX,XX +XXX,XX @@ static void init_call_layout(TCGHelperInfo *info)
40
case dh_typecode_ptr:
41
type = TCG_TYPE_PTR;
42
break;
43
+ case dh_typecode_i128:
44
+ type = TCG_TYPE_I128;
45
+ break;
46
default:
47
g_assert_not_reached();
48
}
49
@@ -XXX,XX +XXX,XX @@ static void init_call_layout(TCGHelperInfo *info)
50
}
51
break;
52
53
+ case TCG_TYPE_I128:
54
+ switch (/* TODO */ TCG_CALL_ARG_NORMAL) {
55
+ case TCG_CALL_ARG_EVEN:
56
+ layout_arg_even(&cum);
57
+ /* fall through */
58
+ case TCG_CALL_ARG_NORMAL:
59
+ layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
60
+ break;
61
+ default:
62
+ qemu_build_not_reached();
63
+ }
64
+ break;
65
+
66
default:
67
g_assert_not_reached();
68
}
69
@@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
70
op->args[pi++] = temp_arg(ret);
71
break;
72
case 2:
73
+ case 4:
74
tcg_debug_assert(ret != NULL);
75
- tcg_debug_assert(ret->base_type == ret->type + 1);
76
+ tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
77
tcg_debug_assert(ret->temp_subindex == 0);
78
- op->args[pi++] = temp_arg(ret);
79
- op->args[pi++] = temp_arg(ret + 1);
80
+ for (i = 0; i < n; ++i) {
81
+ op->args[pi++] = temp_arg(ret + i);
82
+ }
83
break;
84
default:
85
g_assert_not_reached();
86
--
87
2.34.1
88
89
diff view generated by jsdifflib
New patch
1
When allocating a temp to the stack frame, consider the
2
base type and allocate all parts at once.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/tcg.c | 34 ++++++++++++++++++++++++++--------
8
1 file changed, 26 insertions(+), 8 deletions(-)
9
10
diff --git a/tcg/tcg.c b/tcg/tcg.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/tcg.c
13
+++ b/tcg/tcg.c
14
@@ -XXX,XX +XXX,XX @@ static bool liveness_pass_2(TCGContext *s)
15
16
static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
17
{
18
- int size = tcg_type_size(ts->type);
19
- int align;
20
intptr_t off;
21
+ int size, align;
22
23
- switch (ts->type) {
24
+ /* When allocating an object, look at the full type. */
25
+ size = tcg_type_size(ts->base_type);
26
+ switch (ts->base_type) {
27
case TCG_TYPE_I32:
28
align = 4;
29
break;
30
@@ -XXX,XX +XXX,XX @@ static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
31
tcg_raise_tb_overflow(s);
32
}
33
s->current_frame_offset = off + size;
34
-
35
- ts->mem_offset = off;
36
#if defined(__sparc__)
37
- ts->mem_offset += TCG_TARGET_STACK_BIAS;
38
+ off += TCG_TARGET_STACK_BIAS;
39
#endif
40
- ts->mem_base = s->frame_temp;
41
- ts->mem_allocated = 1;
42
+
43
+ /* If the object was subdivided, assign memory to all the parts. */
44
+ if (ts->base_type != ts->type) {
45
+ int part_size = tcg_type_size(ts->type);
46
+ int part_count = size / part_size;
47
+
48
+ /*
49
+ * Each part is allocated sequentially in tcg_temp_new_internal.
50
+ * Jump back to the first part by subtracting the current index.
51
+ */
52
+ ts -= ts->temp_subindex;
53
+ for (int i = 0; i < part_count; ++i) {
54
+ ts[i].mem_offset = off + i * part_size;
55
+ ts[i].mem_base = s->frame_temp;
56
+ ts[i].mem_allocated = 1;
57
+ }
58
+ } else {
59
+ ts->mem_offset = off;
60
+ ts->mem_base = s->frame_temp;
61
+ ts->mem_allocated = 1;
62
+ }
63
}
64
65
/* Assign @reg to @ts, and update reg_to_temp[]. */
66
--
67
2.34.1
68
69
diff view generated by jsdifflib
New patch
1
1
Implement the function for arm, i386, and s390x, which will use it.
2
Add stubs for all other backends.
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/tcg.c | 2 ++
9
tcg/aarch64/tcg-target.c.inc | 7 +++++++
10
tcg/arm/tcg-target.c.inc | 20 ++++++++++++++++++++
11
tcg/i386/tcg-target.c.inc | 8 ++++++++
12
tcg/loongarch64/tcg-target.c.inc | 7 +++++++
13
tcg/mips/tcg-target.c.inc | 7 +++++++
14
tcg/ppc/tcg-target.c.inc | 7 +++++++
15
tcg/riscv/tcg-target.c.inc | 7 +++++++
16
tcg/s390x/tcg-target.c.inc | 7 +++++++
17
tcg/sparc64/tcg-target.c.inc | 7 +++++++
18
tcg/tci/tcg-target.c.inc | 7 +++++++
19
11 files changed, 86 insertions(+)
20
21
diff --git a/tcg/tcg.c b/tcg/tcg.c
22
index XXXXXXX..XXXXXXX 100644
23
--- a/tcg/tcg.c
24
+++ b/tcg/tcg.c
25
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
26
static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
27
static void tcg_out_movi(TCGContext *s, TCGType type,
28
TCGReg ret, tcg_target_long arg);
29
+static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long)
30
+ __attribute__((unused));
31
static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
32
static void tcg_out_goto_tb(TCGContext *s, int which);
33
static void tcg_out_op(TCGContext *s, TCGOpcode opc,
34
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
35
index XXXXXXX..XXXXXXX 100644
36
--- a/tcg/aarch64/tcg-target.c.inc
37
+++ b/tcg/aarch64/tcg-target.c.inc
38
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
39
tcg_out_insn(s, 3305, LDR, 0, rd);
40
}
41
42
+static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
43
+ tcg_target_long imm)
44
+{
45
+ /* This function is only used for passing structs by reference. */
46
+ g_assert_not_reached();
47
+}
48
+
49
/* Define something more legible for general use. */
50
#define tcg_out_ldst_r tcg_out_insn_3310
51
52
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
53
index XXXXXXX..XXXXXXX 100644
54
--- a/tcg/arm/tcg-target.c.inc
55
+++ b/tcg/arm/tcg-target.c.inc
56
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type,
57
tcg_out_movi32(s, COND_AL, ret, arg);
58
}
59
60
+static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
61
+ tcg_target_long imm)
62
+{
63
+ int enc, opc = ARITH_ADD;
64
+
65
+ /* All of the easiest immediates to encode are positive. */
66
+ if (imm < 0) {
67
+ imm = -imm;
68
+ opc = ARITH_SUB;
69
+ }
70
+ enc = encode_imm(imm);
71
+ if (enc >= 0) {
72
+ tcg_out_dat_imm(s, COND_AL, opc, rd, rs, enc);
73
+ } else {
74
+ tcg_out_movi32(s, COND_AL, TCG_REG_TMP, imm);
75
+ tcg_out_dat_reg(s, COND_AL, opc, rd, rs,
76
+ TCG_REG_TMP, SHIFT_IMM_LSL(0));
77
+ }
78
+}
79
+
80
/* Type is always V128, with I64 elements. */
81
static void tcg_out_dup2_vec(TCGContext *s, TCGReg rd, TCGReg rl, TCGReg rh)
82
{
83
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
84
index XXXXXXX..XXXXXXX 100644
85
--- a/tcg/i386/tcg-target.c.inc
86
+++ b/tcg/i386/tcg-target.c.inc
87
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type,
88
}
89
}
90
91
+static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
92
+ tcg_target_long imm)
93
+{
94
+ /* This function is only used for passing structs by reference. */
95
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
96
+ tcg_out_modrm_offset(s, OPC_LEA, rd, rs, imm);
97
+}
98
+
99
static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
100
{
101
if (val == (int8_t)val) {
102
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
103
index XXXXXXX..XXXXXXX 100644
104
--- a/tcg/loongarch64/tcg-target.c.inc
105
+++ b/tcg/loongarch64/tcg-target.c.inc
106
@@ -XXX,XX +XXX,XX @@ static void tcg_out_addi(TCGContext *s, TCGType type, TCGReg rd,
107
}
108
}
109
110
+static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
111
+ tcg_target_long imm)
112
+{
113
+ /* This function is only used for passing structs by reference. */
114
+ g_assert_not_reached();
115
+}
116
+
117
static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg)
118
{
119
tcg_out_opc_andi(s, ret, arg, 0xff);
120
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
121
index XXXXXXX..XXXXXXX 100644
122
--- a/tcg/mips/tcg-target.c.inc
123
+++ b/tcg/mips/tcg-target.c.inc
124
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type,
125
}
126
}
127
128
+static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
129
+ tcg_target_long imm)
130
+{
131
+ /* This function is only used for passing structs by reference. */
132
+ g_assert_not_reached();
133
+}
134
+
135
static void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg, int flags)
136
{
137
/* ret and arg can't be register tmp0 */
138
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
139
index XXXXXXX..XXXXXXX 100644
140
--- a/tcg/ppc/tcg-target.c.inc
141
+++ b/tcg/ppc/tcg-target.c.inc
142
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
143
}
144
}
145
146
+static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
147
+ tcg_target_long imm)
148
+{
149
+ /* This function is only used for passing structs by reference. */
150
+ g_assert_not_reached();
151
+}
152
+
153
static bool mask_operand(uint32_t c, int *mb, int *me)
154
{
155
uint32_t lsb, test;
156
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
157
index XXXXXXX..XXXXXXX 100644
158
--- a/tcg/riscv/tcg-target.c.inc
159
+++ b/tcg/riscv/tcg-target.c.inc
160
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
161
tcg_out_opc_imm(s, OPC_LD, rd, rd, 0);
162
}
163
164
+static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
165
+ tcg_target_long imm)
166
+{
167
+ /* This function is only used for passing structs by reference. */
168
+ g_assert_not_reached();
169
+}
170
+
171
static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg)
172
{
173
tcg_out_opc_imm(s, OPC_ANDI, ret, arg, 0xff);
174
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
175
index XXXXXXX..XXXXXXX 100644
176
--- a/tcg/s390x/tcg-target.c.inc
177
+++ b/tcg/s390x/tcg-target.c.inc
178
@@ -XXX,XX +XXX,XX @@ static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
179
return false;
180
}
181
182
+static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
183
+ tcg_target_long imm)
184
+{
185
+ /* This function is only used for passing structs by reference. */
186
+ tcg_out_mem(s, RX_LA, RXY_LAY, rd, rs, TCG_REG_NONE, imm);
187
+}
188
+
189
static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src,
190
int msb, int lsb, int ofs, int z)
191
{
192
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
193
index XXXXXXX..XXXXXXX 100644
194
--- a/tcg/sparc64/tcg-target.c.inc
195
+++ b/tcg/sparc64/tcg-target.c.inc
196
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type,
197
tcg_out_movi_int(s, type, ret, arg, false, TCG_REG_T2);
198
}
199
200
+static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
201
+ tcg_target_long imm)
202
+{
203
+ /* This function is only used for passing structs by reference. */
204
+ g_assert_not_reached();
205
+}
206
+
207
static void tcg_out_ldst_rr(TCGContext *s, TCGReg data, TCGReg a1,
208
TCGReg a2, int op)
209
{
210
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
211
index XXXXXXX..XXXXXXX 100644
212
--- a/tcg/tci/tcg-target.c.inc
213
+++ b/tcg/tci/tcg-target.c.inc
214
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type,
215
}
216
}
217
218
+static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
219
+ tcg_target_long imm)
220
+{
221
+ /* This function is only used for passing structs by reference. */
222
+ g_assert_not_reached();
223
+}
224
+
225
static void tcg_out_call(TCGContext *s, const tcg_insn_unit *func,
226
const TCGHelperInfo *info)
227
{
228
--
229
2.34.1
230
231
diff view generated by jsdifflib
New patch
1
1
These will be used by some hosts, both 32 and 64-bit, to pass and
2
return i128. Not yet used, because allocation is not yet enabled.
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/tcg-internal.h | 3 +
8
tcg/tcg.c | 135 ++++++++++++++++++++++++++++++++++++++++++++-
9
2 files changed, 135 insertions(+), 3 deletions(-)
10
11
diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/tcg-internal.h
14
+++ b/tcg/tcg-internal.h
15
@@ -XXX,XX +XXX,XX @@
16
*/
17
typedef enum {
18
TCG_CALL_RET_NORMAL, /* by registers */
19
+ TCG_CALL_RET_BY_REF, /* for i128, by reference */
20
} TCGCallReturnKind;
21
22
typedef enum {
23
@@ -XXX,XX +XXX,XX @@ typedef enum {
24
TCG_CALL_ARG_EXTEND, /* for i32, as a sign/zero-extended i64 */
25
TCG_CALL_ARG_EXTEND_U, /* ... as a zero-extended i64 */
26
TCG_CALL_ARG_EXTEND_S, /* ... as a sign-extended i64 */
27
+ TCG_CALL_ARG_BY_REF, /* for i128, by reference, first */
28
+ TCG_CALL_ARG_BY_REF_N, /* ... by reference, subsequent */
29
} TCGCallArgumentKind;
30
31
typedef struct TCGCallArgumentLoc {
32
diff --git a/tcg/tcg.c b/tcg/tcg.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/tcg/tcg.c
35
+++ b/tcg/tcg.c
36
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
37
static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
38
static void tcg_out_movi(TCGContext *s, TCGType type,
39
TCGReg ret, tcg_target_long arg);
40
-static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long)
41
- __attribute__((unused));
42
+static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
43
static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
44
static void tcg_out_goto_tb(TCGContext *s, int which);
45
static void tcg_out_op(TCGContext *s, TCGOpcode opc,
46
@@ -XXX,XX +XXX,XX @@ static void layout_arg_normal_n(TCGCumulativeArgs *cum,
47
cum->arg_slot += n;
48
}
49
50
+static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
51
+{
52
+ TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
53
+ int n = 128 / TCG_TARGET_REG_BITS;
54
+
55
+ /* The first subindex carries the pointer. */
56
+ layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
57
+
58
+ /*
59
+ * The callee is allowed to clobber memory associated with
60
+ * structure pass by-reference. Therefore we must make copies.
61
+ * Allocate space from "ref_slot", which will be adjusted to
62
+ * follow the parameters on the stack.
63
+ */
64
+ loc[0].ref_slot = cum->ref_slot;
65
+
66
+ /*
67
+ * Subsequent words also go into the reference slot, but
68
+ * do not accumulate into the regular arguments.
69
+ */
70
+ for (int i = 1; i < n; ++i) {
71
+ loc[i] = (TCGCallArgumentLoc){
72
+ .kind = TCG_CALL_ARG_BY_REF_N,
73
+ .arg_idx = cum->arg_idx,
74
+ .tmp_subindex = i,
75
+ .ref_slot = cum->ref_slot + i,
76
+ };
77
+ }
78
+ cum->info_in_idx += n;
79
+ cum->ref_slot += n;
80
+}
81
+
82
static void init_call_layout(TCGHelperInfo *info)
83
{
84
int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
85
@@ -XXX,XX +XXX,XX @@ static void init_call_layout(TCGHelperInfo *info)
86
case TCG_CALL_RET_NORMAL:
87
assert(info->nr_out <= ARRAY_SIZE(tcg_target_call_oarg_regs));
88
break;
89
+ case TCG_CALL_RET_BY_REF:
90
+ /*
91
+ * Allocate the first argument to the output.
92
+ * We don't need to store this anywhere, just make it
93
+ * unavailable for use in the input loop below.
94
+ */
95
+ cum.arg_slot = 1;
96
+ break;
97
default:
98
qemu_build_not_reached();
99
}
100
@@ -XXX,XX +XXX,XX @@ static void init_call_layout(TCGHelperInfo *info)
101
case TCG_CALL_ARG_NORMAL:
102
layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
103
break;
104
+ case TCG_CALL_ARG_BY_REF:
105
+ layout_arg_by_ref(&cum, info);
106
+ break;
107
default:
108
qemu_build_not_reached();
109
}
110
@@ -XXX,XX +XXX,XX @@ static void init_call_layout(TCGHelperInfo *info)
111
assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
112
/* Validate the backend has enough argument space. */
113
assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
114
- assert(cum.ref_slot <= max_stk_slots);
115
+
116
+ /*
117
+ * Relocate the "ref_slot" area to the end of the parameters.
118
+ * Minimizing this stack offset helps code size for x86,
119
+ * which has a signed 8-bit offset encoding.
120
+ */
121
+ if (cum.ref_slot != 0) {
122
+ int ref_base = 0;
123
+
124
+ if (cum.arg_slot > max_reg_slots) {
125
+ int align = __alignof(Int128) / sizeof(tcg_target_long);
126
+
127
+ ref_base = cum.arg_slot - max_reg_slots;
128
+ if (align > 1) {
129
+ ref_base = ROUND_UP(ref_base, align);
130
+ }
131
+ }
132
+ assert(ref_base + cum.ref_slot <= max_stk_slots);
133
+
134
+ if (ref_base != 0) {
135
+ for (int i = cum.info_in_idx - 1; i >= 0; --i) {
136
+ TCGCallArgumentLoc *loc = &info->in[i];
137
+ switch (loc->kind) {
138
+ case TCG_CALL_ARG_BY_REF:
139
+ case TCG_CALL_ARG_BY_REF_N:
140
+ loc->ref_slot += ref_base;
141
+ break;
142
+ default:
143
+ break;
144
+ }
145
+ }
146
+ }
147
+ }
148
}
149
150
static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
151
@@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
152
153
switch (loc->kind) {
154
case TCG_CALL_ARG_NORMAL:
155
+ case TCG_CALL_ARG_BY_REF:
156
+ case TCG_CALL_ARG_BY_REF_N:
157
op->args[pi++] = temp_arg(ts);
158
break;
159
160
@@ -XXX,XX +XXX,XX @@ static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
161
}
162
}
163
164
+static void load_arg_ref(TCGContext *s, int arg_slot, TCGReg ref_base,
165
+ intptr_t ref_off, TCGRegSet *allocated_regs)
166
+{
167
+ TCGReg reg;
168
+ int stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
169
+
170
+ if (stk_slot < 0) {
171
+ reg = tcg_target_call_iarg_regs[arg_slot];
172
+ tcg_reg_free(s, reg, *allocated_regs);
173
+ tcg_out_addi_ptr(s, reg, ref_base, ref_off);
174
+ tcg_regset_set_reg(*allocated_regs, reg);
175
+ } else {
176
+ reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
177
+ *allocated_regs, 0, false);
178
+ tcg_out_addi_ptr(s, reg, ref_base, ref_off);
179
+ tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
180
+ TCG_TARGET_CALL_STACK_OFFSET
181
+ + stk_slot * sizeof(tcg_target_long));
182
+ }
183
+}
184
+
185
static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
186
{
187
const int nb_oargs = TCGOP_CALLO(op);
188
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
189
case TCG_CALL_ARG_EXTEND_S:
190
load_arg_normal(s, loc, ts, &allocated_regs);
191
break;
192
+ case TCG_CALL_ARG_BY_REF:
193
+ load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
194
+ load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
195
+ TCG_TARGET_CALL_STACK_OFFSET
196
+ + loc->ref_slot * sizeof(tcg_target_long),
197
+ &allocated_regs);
198
+ break;
199
+ case TCG_CALL_ARG_BY_REF_N:
200
+ load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
201
+ break;
202
default:
203
g_assert_not_reached();
204
}
205
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
206
save_globals(s, allocated_regs);
207
}
208
209
+ /*
210
+ * If the ABI passes a pointer to the returned struct as the first
211
+ * argument, load that now. Pass a pointer to the output home slot.
212
+ */
213
+ if (info->out_kind == TCG_CALL_RET_BY_REF) {
214
+ TCGTemp *ts = arg_temp(op->args[0]);
215
+
216
+ if (!ts->mem_allocated) {
217
+ temp_allocate_frame(s, ts);
218
+ }
219
+ load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
220
+ }
221
+
222
tcg_out_call(s, tcg_call_func(op), info);
223
224
/* Assign output registers and emit moves if needed. */
225
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
226
ts->mem_coherent = 0;
227
}
228
break;
229
+
230
+ case TCG_CALL_RET_BY_REF:
231
+ /* The callee has performed a write through the reference. */
232
+ for (i = 0; i < nb_oargs; i++) {
233
+ TCGTemp *ts = arg_temp(op->args[i]);
234
+ ts->val_type = TEMP_VAL_MEM;
235
+ }
236
+ break;
237
+
238
default:
239
g_assert_not_reached();
240
}
241
--
242
2.34.1
243
244
diff view generated by jsdifflib
New patch
1
1
Replace the flat array tcg_target_call_oarg_regs[] with
2
a function call including the TCGCallReturnKind.
3
4
Extend the set of registers for ARM to r0-r3 to match the ABI:
5
https://github.com/ARM-software/abi-aa/blob/main/aapcs32/aapcs32.rst#result-return
6
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
8
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
tcg/tcg.c | 9 ++++++---
12
tcg/aarch64/tcg-target.c.inc | 10 +++++++---
13
tcg/arm/tcg-target.c.inc | 10 +++++++---
14
tcg/i386/tcg-target.c.inc | 16 ++++++++++------
15
tcg/loongarch64/tcg-target.c.inc | 10 ++++++----
16
tcg/mips/tcg-target.c.inc | 10 ++++++----
17
tcg/ppc/tcg-target.c.inc | 10 ++++++----
18
tcg/riscv/tcg-target.c.inc | 10 ++++++----
19
tcg/s390x/tcg-target.c.inc | 9 ++++++---
20
tcg/sparc64/tcg-target.c.inc | 12 ++++++------
21
tcg/tci/tcg-target.c.inc | 12 ++++++------
22
11 files changed, 72 insertions(+), 46 deletions(-)
23
24
diff --git a/tcg/tcg.c b/tcg/tcg.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/tcg/tcg.c
27
+++ b/tcg/tcg.c
28
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
29
TCGReg base, intptr_t ofs);
30
static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
31
const TCGHelperInfo *info);
32
+static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
33
static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
34
#ifdef TCG_TARGET_NEED_LDST_LABELS
35
static int tcg_out_ldst_finalize(TCGContext *s);
36
@@ -XXX,XX +XXX,XX @@ static void init_call_layout(TCGHelperInfo *info)
37
case dh_typecode_s64:
38
info->nr_out = 64 / TCG_TARGET_REG_BITS;
39
info->out_kind = TCG_CALL_RET_NORMAL;
40
- assert(info->nr_out <= ARRAY_SIZE(tcg_target_call_oarg_regs));
41
+ /* Query the last register now to trigger any assert early. */
42
+ tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
43
break;
44
case dh_typecode_i128:
45
info->nr_out = 128 / TCG_TARGET_REG_BITS;
46
info->out_kind = TCG_CALL_RET_NORMAL; /* TODO */
47
switch (/* TODO */ TCG_CALL_RET_NORMAL) {
48
case TCG_CALL_RET_NORMAL:
49
- assert(info->nr_out <= ARRAY_SIZE(tcg_target_call_oarg_regs));
50
+ /* Query the last register now to trigger any assert early. */
51
+ tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
52
break;
53
case TCG_CALL_RET_BY_REF:
54
/*
55
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
56
case TCG_CALL_RET_NORMAL:
57
for (i = 0; i < nb_oargs; i++) {
58
TCGTemp *ts = arg_temp(op->args[i]);
59
- TCGReg reg = tcg_target_call_oarg_regs[i];
60
+ TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
61
62
/* ENV should not be modified. */
63
tcg_debug_assert(!temp_readonly(ts));
64
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
65
index XXXXXXX..XXXXXXX 100644
66
--- a/tcg/aarch64/tcg-target.c.inc
67
+++ b/tcg/aarch64/tcg-target.c.inc
68
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_call_iarg_regs[8] = {
69
TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
70
TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
71
};
72
-static const int tcg_target_call_oarg_regs[1] = {
73
- TCG_REG_X0
74
-};
75
+
76
+static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
77
+{
78
+ tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
79
+ tcg_debug_assert(slot >= 0 && slot <= 1);
80
+ return TCG_REG_X0 + slot;
81
+}
82
83
#define TCG_REG_TMP TCG_REG_X30
84
#define TCG_VEC_TMP TCG_REG_V31
85
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
86
index XXXXXXX..XXXXXXX 100644
87
--- a/tcg/arm/tcg-target.c.inc
88
+++ b/tcg/arm/tcg-target.c.inc
89
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = {
90
static const int tcg_target_call_iarg_regs[4] = {
91
TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3
92
};
93
-static const int tcg_target_call_oarg_regs[2] = {
94
- TCG_REG_R0, TCG_REG_R1
95
-};
96
+
97
+static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
98
+{
99
+ tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
100
+ tcg_debug_assert(slot >= 0 && slot <= 3);
101
+ return TCG_REG_R0 + slot;
102
+}
103
104
#define TCG_REG_TMP TCG_REG_R12
105
#define TCG_VEC_TMP TCG_REG_Q15
106
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
107
index XXXXXXX..XXXXXXX 100644
108
--- a/tcg/i386/tcg-target.c.inc
109
+++ b/tcg/i386/tcg-target.c.inc
110
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_call_iarg_regs[] = {
111
#endif
112
};
113
114
-static const int tcg_target_call_oarg_regs[] = {
115
- TCG_REG_EAX,
116
-#if TCG_TARGET_REG_BITS == 32
117
- TCG_REG_EDX
118
-#endif
119
-};
120
+static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
121
+{
122
+ switch (kind) {
123
+ case TCG_CALL_RET_NORMAL:
124
+ tcg_debug_assert(slot >= 0 && slot <= 1);
125
+ return slot ? TCG_REG_EDX : TCG_REG_EAX;
126
+ default:
127
+ g_assert_not_reached();
128
+ }
129
+}
130
131
/* Constants we accept. */
132
#define TCG_CT_CONST_S32 0x100
133
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
134
index XXXXXXX..XXXXXXX 100644
135
--- a/tcg/loongarch64/tcg-target.c.inc
136
+++ b/tcg/loongarch64/tcg-target.c.inc
137
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_call_iarg_regs[] = {
138
TCG_REG_A7,
139
};
140
141
-static const int tcg_target_call_oarg_regs[] = {
142
- TCG_REG_A0,
143
- TCG_REG_A1,
144
-};
145
+static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
146
+{
147
+ tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
148
+ tcg_debug_assert(slot >= 0 && slot <= 1);
149
+ return TCG_REG_A0 + slot;
150
+}
151
152
#ifndef CONFIG_SOFTMMU
153
#define USE_GUEST_BASE (guest_base != 0)
154
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
155
index XXXXXXX..XXXXXXX 100644
156
--- a/tcg/mips/tcg-target.c.inc
157
+++ b/tcg/mips/tcg-target.c.inc
158
@@ -XXX,XX +XXX,XX @@ static const TCGReg tcg_target_call_iarg_regs[] = {
159
#endif
160
};
161
162
-static const TCGReg tcg_target_call_oarg_regs[2] = {
163
- TCG_REG_V0,
164
- TCG_REG_V1
165
-};
166
+static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
167
+{
168
+ tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
169
+ tcg_debug_assert(slot >= 0 && slot <= 1);
170
+ return TCG_REG_V0 + slot;
171
+}
172
173
static const tcg_insn_unit *tb_ret_addr;
174
static const tcg_insn_unit *bswap32_addr;
175
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
176
index XXXXXXX..XXXXXXX 100644
177
--- a/tcg/ppc/tcg-target.c.inc
178
+++ b/tcg/ppc/tcg-target.c.inc
179
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_call_iarg_regs[] = {
180
TCG_REG_R10
181
};
182
183
-static const int tcg_target_call_oarg_regs[] = {
184
- TCG_REG_R3,
185
- TCG_REG_R4
186
-};
187
+static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
188
+{
189
+ tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
190
+ tcg_debug_assert(slot >= 0 && slot <= 1);
191
+ return TCG_REG_R3 + slot;
192
+}
193
194
static const int tcg_target_callee_save_regs[] = {
195
#ifdef _CALL_DARWIN
196
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
197
index XXXXXXX..XXXXXXX 100644
198
--- a/tcg/riscv/tcg-target.c.inc
199
+++ b/tcg/riscv/tcg-target.c.inc
200
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_call_iarg_regs[] = {
201
TCG_REG_A7,
202
};
203
204
-static const int tcg_target_call_oarg_regs[] = {
205
- TCG_REG_A0,
206
- TCG_REG_A1,
207
-};
208
+static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
209
+{
210
+ tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
211
+ tcg_debug_assert(slot >= 0 && slot <= 1);
212
+ return TCG_REG_A0 + slot;
213
+}
214
215
#define TCG_CT_CONST_ZERO 0x100
216
#define TCG_CT_CONST_S12 0x200
217
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
218
index XXXXXXX..XXXXXXX 100644
219
--- a/tcg/s390x/tcg-target.c.inc
220
+++ b/tcg/s390x/tcg-target.c.inc
221
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_call_iarg_regs[] = {
222
TCG_REG_R6,
223
};
224
225
-static const int tcg_target_call_oarg_regs[] = {
226
- TCG_REG_R2,
227
-};
228
+static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
229
+{
230
+ tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
231
+ tcg_debug_assert(slot == 0);
232
+ return TCG_REG_R2;
233
+}
234
235
#define S390_CC_EQ 8
236
#define S390_CC_LT 4
237
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
238
index XXXXXXX..XXXXXXX 100644
239
--- a/tcg/sparc64/tcg-target.c.inc
240
+++ b/tcg/sparc64/tcg-target.c.inc
241
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_call_iarg_regs[6] = {
242
TCG_REG_O5,
243
};
244
245
-static const int tcg_target_call_oarg_regs[] = {
246
- TCG_REG_O0,
247
- TCG_REG_O1,
248
- TCG_REG_O2,
249
- TCG_REG_O3,
250
-};
251
+static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
252
+{
253
+ tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
254
+ tcg_debug_assert(slot >= 0 && slot <= 3);
255
+ return TCG_REG_O0 + slot;
256
+}
257
258
#define INSN_OP(x) ((x) << 30)
259
#define INSN_OP2(x) ((x) << 22)
260
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
261
index XXXXXXX..XXXXXXX 100644
262
--- a/tcg/tci/tcg-target.c.inc
263
+++ b/tcg/tci/tcg-target.c.inc
264
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = {
265
/* No call arguments via registers. All will be stored on the "stack". */
266
static const int tcg_target_call_iarg_regs[] = { };
267
268
-static const int tcg_target_call_oarg_regs[] = {
269
- TCG_REG_R0,
270
-#if TCG_TARGET_REG_BITS == 32
271
- TCG_REG_R1
272
-#endif
273
-};
274
+static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
275
+{
276
+ tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
277
+ tcg_debug_assert(slot >= 0 && slot < 64 / TCG_TARGET_REG_BITS);
278
+ return TCG_REG_R0 + slot;
279
+}
280
281
#ifdef CONFIG_DEBUG_TCG
282
static const char *const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
283
--
284
2.34.1
285
286
diff view generated by jsdifflib
New patch
1
This will be used by _WIN64 to return i128. Not yet used,
2
because allocation is not yet enabled.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/tcg-internal.h | 1 +
8
tcg/tcg.c | 19 +++++++++++++++++++
9
2 files changed, 20 insertions(+)
10
11
diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/tcg-internal.h
14
+++ b/tcg/tcg-internal.h
15
@@ -XXX,XX +XXX,XX @@
16
typedef enum {
17
TCG_CALL_RET_NORMAL, /* by registers */
18
TCG_CALL_RET_BY_REF, /* for i128, by reference */
19
+ TCG_CALL_RET_BY_VEC, /* for i128, by vector register */
20
} TCGCallReturnKind;
21
22
typedef enum {
23
diff --git a/tcg/tcg.c b/tcg/tcg.c
24
index XXXXXXX..XXXXXXX 100644
25
--- a/tcg/tcg.c
26
+++ b/tcg/tcg.c
27
@@ -XXX,XX +XXX,XX @@ static void init_call_layout(TCGHelperInfo *info)
28
/* Query the last register now to trigger any assert early. */
29
tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
30
break;
31
+ case TCG_CALL_RET_BY_VEC:
32
+ /* Query the single register now to trigger any assert early. */
33
+ tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
34
+ break;
35
case TCG_CALL_RET_BY_REF:
36
/*
37
* Allocate the first argument to the output.
38
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
39
}
40
break;
41
42
+ case TCG_CALL_RET_BY_VEC:
43
+ {
44
+ TCGTemp *ts = arg_temp(op->args[0]);
45
+
46
+ tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
47
+ tcg_debug_assert(ts->temp_subindex == 0);
48
+ if (!ts->mem_allocated) {
49
+ temp_allocate_frame(s, ts);
50
+ }
51
+ tcg_out_st(s, TCG_TYPE_V128,
52
+ tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
53
+ ts->mem_base->reg, ts->mem_offset);
54
+ }
55
+ /* fall through to mark all parts in memory */
56
+
57
case TCG_CALL_RET_BY_REF:
58
/* The callee has performed a write through the reference. */
59
for (i = 0; i < nb_oargs; i++) {
60
--
61
2.34.1
62
63
diff view generated by jsdifflib
New patch
1
We are about to allow passing Int128 to/from tcg helper functions,
2
but libffi doesn't support __int128_t, so use the structure.
1
3
4
In order for atomic128.h to continue working, we must provide
5
a mechanism to frob between real __int128_t and the structure.
6
Provide a new union, Int128Alias, for this. We cannot modify
7
Int128 itself, as any changed alignment would also break libffi.
8
9
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
10
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
13
include/qemu/atomic128.h | 29 +++++++++++++++++++++------
14
include/qemu/int128.h | 25 +++++++++++++++++++++---
15
util/int128.c | 42 ++++++++++++++++++++++++++++++++++++++++
16
3 files changed, 87 insertions(+), 9 deletions(-)
17
18
diff --git a/include/qemu/atomic128.h b/include/qemu/atomic128.h
19
index XXXXXXX..XXXXXXX 100644
20
--- a/include/qemu/atomic128.h
21
+++ b/include/qemu/atomic128.h
22
@@ -XXX,XX +XXX,XX @@
23
#if defined(CONFIG_ATOMIC128)
24
static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
25
{
26
- return qatomic_cmpxchg__nocheck(ptr, cmp, new);
27
+ Int128Alias r, c, n;
28
+
29
+ c.s = cmp;
30
+ n.s = new;
31
+ r.i = qatomic_cmpxchg__nocheck((__int128_t *)ptr, c.i, n.i);
32
+ return r.s;
33
}
34
# define HAVE_CMPXCHG128 1
35
#elif defined(CONFIG_CMPXCHG128)
36
static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
37
{
38
- return __sync_val_compare_and_swap_16(ptr, cmp, new);
39
+ Int128Alias r, c, n;
40
+
41
+ c.s = cmp;
42
+ n.s = new;
43
+ r.i = __sync_val_compare_and_swap_16((__int128_t *)ptr, c.i, n.i);
44
+ return r.s;
45
}
46
# define HAVE_CMPXCHG128 1
47
#elif defined(__aarch64__)
48
@@ -XXX,XX +XXX,XX @@ Int128 QEMU_ERROR("unsupported atomic")
49
#if defined(CONFIG_ATOMIC128)
50
static inline Int128 atomic16_read(Int128 *ptr)
51
{
52
- return qatomic_read__nocheck(ptr);
53
+ Int128Alias r;
54
+
55
+ r.i = qatomic_read__nocheck((__int128_t *)ptr);
56
+ return r.s;
57
}
58
59
static inline void atomic16_set(Int128 *ptr, Int128 val)
60
{
61
- qatomic_set__nocheck(ptr, val);
62
+ Int128Alias v;
63
+
64
+ v.s = val;
65
+ qatomic_set__nocheck((__int128_t *)ptr, v.i);
66
}
67
68
# define HAVE_ATOMIC128 1
69
@@ -XXX,XX +XXX,XX @@ static inline void atomic16_set(Int128 *ptr, Int128 val)
70
static inline Int128 atomic16_read(Int128 *ptr)
71
{
72
/* Maybe replace 0 with 0, returning the old value. */
73
- return atomic16_cmpxchg(ptr, 0, 0);
74
+ Int128 z = int128_make64(0);
75
+ return atomic16_cmpxchg(ptr, z, z);
76
}
77
78
static inline void atomic16_set(Int128 *ptr, Int128 val)
79
@@ -XXX,XX +XXX,XX @@ static inline void atomic16_set(Int128 *ptr, Int128 val)
80
do {
81
cmp = old;
82
old = atomic16_cmpxchg(ptr, cmp, val);
83
- } while (old != cmp);
84
+ } while (int128_ne(old, cmp));
85
}
86
87
# define HAVE_ATOMIC128 1
88
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
89
index XXXXXXX..XXXXXXX 100644
90
--- a/include/qemu/int128.h
91
+++ b/include/qemu/int128.h
92
@@ -XXX,XX +XXX,XX @@
93
94
#include "qemu/bswap.h"
95
96
-#ifdef CONFIG_INT128
97
+/*
98
+ * With TCI, we need to use libffi for interfacing with TCG helpers.
99
+ * But libffi does not support __int128_t, and therefore cannot pass
100
+ * or return values of this type, force use of the Int128 struct.
101
+ */
102
+#if defined(CONFIG_INT128) && !defined(CONFIG_TCG_INTERPRETER)
103
typedef __int128_t Int128;
104
105
static inline Int128 int128_make64(uint64_t a)
106
@@ -XXX,XX +XXX,XX @@ Int128 int128_divu(Int128, Int128);
107
Int128 int128_remu(Int128, Int128);
108
Int128 int128_divs(Int128, Int128);
109
Int128 int128_rems(Int128, Int128);
110
-
111
-#endif /* CONFIG_INT128 */
112
+#endif /* CONFIG_INT128 && !CONFIG_TCG_INTERPRETER */
113
114
static inline void bswap128s(Int128 *s)
115
{
116
@@ -XXX,XX +XXX,XX @@ static inline void bswap128s(Int128 *s)
117
#define INT128_MAX int128_make128(UINT64_MAX, INT64_MAX)
118
#define INT128_MIN int128_make128(0, INT64_MIN)
119
120
+/*
121
+ * When compiler supports a 128-bit type, define a combination of
122
+ * a possible structure and the native types. Ease parameter passing
123
+ * via use of the transparent union extension.
124
+ */
125
+#ifdef CONFIG_INT128
126
+typedef union {
127
+ Int128 s;
128
+ __int128_t i;
129
+ __uint128_t u;
130
+} Int128Alias __attribute__((transparent_union));
131
+#else
132
+typedef Int128 Int128Alias;
133
+#endif /* CONFIG_INT128 */
134
+
135
#endif /* INT128_H */
136
diff --git a/util/int128.c b/util/int128.c
137
index XXXXXXX..XXXXXXX 100644
138
--- a/util/int128.c
139
+++ b/util/int128.c
140
@@ -XXX,XX +XXX,XX @@ Int128 int128_rems(Int128 a, Int128 b)
141
return r;
142
}
143
144
+#elif defined(CONFIG_TCG_INTERPRETER)
145
+
146
+Int128 int128_divu(Int128 a_s, Int128 b_s)
147
+{
148
+ Int128Alias r, a, b;
149
+
150
+ a.s = a_s;
151
+ b.s = b_s;
152
+ r.u = a.u / b.u;
153
+ return r.s;
154
+}
155
+
156
+Int128 int128_remu(Int128 a_s, Int128 b_s)
157
+{
158
+ Int128Alias r, a, b;
159
+
160
+ a.s = a_s;
161
+ b.s = b_s;
162
+ r.u = a.u % b.u;
163
+ return r.s;
164
+}
165
+
166
+Int128 int128_divs(Int128 a_s, Int128 b_s)
167
+{
168
+ Int128Alias r, a, b;
169
+
170
+ a.s = a_s;
171
+ b.s = b_s;
172
+ r.i = a.i / b.i;
173
+ return r.s;
174
+}
175
+
176
+Int128 int128_rems(Int128 a_s, Int128 b_s)
177
+{
178
+ Int128Alias r, a, b;
179
+
180
+ a.s = a_s;
181
+ b.s = b_s;
182
+ r.i = a.i % b.i;
183
+ return r.s;
184
+}
185
+
186
#endif
187
--
188
2.34.1
189
190
diff view generated by jsdifflib
New patch
1
Fill in the parameters for the host ABI for Int128.
2
Adjust tcg_target_call_oarg_reg for _WIN64, and
3
tcg_out_call for i386 sysv. Allow TCG_TYPE_V128
4
stores without AVX enabled.
1
5
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tcg/i386/tcg-target.h | 10 ++++++++++
10
tcg/i386/tcg-target.c.inc | 30 +++++++++++++++++++++++++++++-
11
2 files changed, 39 insertions(+), 1 deletion(-)
12
13
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/i386/tcg-target.h
16
+++ b/tcg/i386/tcg-target.h
17
@@ -XXX,XX +XXX,XX @@ typedef enum {
18
#endif
19
#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
20
#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
21
+#if defined(_WIN64)
22
+# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_BY_REF
23
+# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_VEC
24
+#elif TCG_TARGET_REG_BITS == 64
25
+# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL
26
+# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
27
+#else
28
+# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL
29
+# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF
30
+#endif
31
32
extern bool have_bmi1;
33
extern bool have_popcnt;
34
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
35
index XXXXXXX..XXXXXXX 100644
36
--- a/tcg/i386/tcg-target.c.inc
37
+++ b/tcg/i386/tcg-target.c.inc
38
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
39
case TCG_CALL_RET_NORMAL:
40
tcg_debug_assert(slot >= 0 && slot <= 1);
41
return slot ? TCG_REG_EDX : TCG_REG_EAX;
42
+#ifdef _WIN64
43
+ case TCG_CALL_RET_BY_VEC:
44
+ tcg_debug_assert(slot == 0);
45
+ return TCG_REG_XMM0;
46
+#endif
47
default:
48
g_assert_not_reached();
49
}
50
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
51
* The gvec infrastructure is asserts that v128 vector loads
52
* and stores use a 16-byte aligned offset. Validate that the
53
* final pointer is aligned by using an insn that will SIGSEGV.
54
+ *
55
+ * This specific instance is also used by TCG_CALL_RET_BY_VEC,
56
+ * for _WIN64, which must have SSE2 but may not have AVX.
57
*/
58
tcg_debug_assert(arg >= 16);
59
- tcg_out_vex_modrm_offset(s, OPC_MOVDQA_WxVx, arg, 0, arg1, arg2);
60
+ if (have_avx1) {
61
+ tcg_out_vex_modrm_offset(s, OPC_MOVDQA_WxVx, arg, 0, arg1, arg2);
62
+ } else {
63
+ tcg_out_modrm_offset(s, OPC_MOVDQA_WxVx, arg, arg1, arg2);
64
+ }
65
break;
66
case TCG_TYPE_V256:
67
/*
68
@@ -XXX,XX +XXX,XX @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest,
69
const TCGHelperInfo *info)
70
{
71
tcg_out_branch(s, 1, dest);
72
+
73
+#ifndef _WIN32
74
+ if (TCG_TARGET_REG_BITS == 32 && info->out_kind == TCG_CALL_RET_BY_REF) {
75
+ /*
76
+ * The sysv i386 abi for struct return places a reference as the
77
+ * first argument of the stack, and pops that argument with the
78
+ * return statement. Since we want to retain the aligned stack
79
+ * pointer for the callee, we do not want to actually push that
80
+ * argument before the call but rely on the normal store to the
81
+ * stack slot. But we do need to compensate for the pop in order
82
+ * to reset our correct stack pointer value.
83
+ * Pushing a garbage value back onto the stack is quickest.
84
+ */
85
+ tcg_out_push(s, TCG_REG_EAX);
86
+ }
87
+#endif
88
}
89
90
static void tcg_out_jmp(TCGContext *s, const tcg_insn_unit *dest)
91
--
92
2.34.1
93
94
diff view generated by jsdifflib
New patch
1
We expect the backend to require register pairs in
2
host-endian ordering, thus for big-endian the first
3
register of a pair contains the high part.
4
We were forcing R0 to contain the low part for calls.
1
5
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
10
tcg/tci.c | 21 +++++++++++----------
11
1 file changed, 11 insertions(+), 10 deletions(-)
12
13
diff --git a/tcg/tci.c b/tcg/tci.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/tci.c
16
+++ b/tcg/tci.c
17
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
18
ffi_call(pptr[1], pptr[0], stack, call_slots);
19
}
20
21
- /* Any result winds up "left-aligned" in the stack[0] slot. */
22
switch (len) {
23
case 0: /* void */
24
break;
25
case 1: /* uint32_t */
26
/*
27
+ * The result winds up "left-aligned" in the stack[0] slot.
28
* Note that libffi has an odd special case in that it will
29
* always widen an integral result to ffi_arg.
30
*/
31
- if (sizeof(ffi_arg) == 4) {
32
- regs[TCG_REG_R0] = *(uint32_t *)stack;
33
- break;
34
- }
35
- /* fall through */
36
- case 2: /* uint64_t */
37
- if (TCG_TARGET_REG_BITS == 32) {
38
- tci_write_reg64(regs, TCG_REG_R1, TCG_REG_R0, stack[0]);
39
+ if (sizeof(ffi_arg) == 8) {
40
+ regs[TCG_REG_R0] = (uint32_t)stack[0];
41
} else {
42
- regs[TCG_REG_R0] = stack[0];
43
+ regs[TCG_REG_R0] = *(uint32_t *)stack;
44
}
45
break;
46
+ case 2: /* uint64_t */
47
+ /*
48
+ * For TCG_TARGET_REG_BITS == 32, the register pair
49
+ * must stay in host memory order.
50
+ */
51
+ memcpy(&regs[TCG_REG_R0], stack, 8);
52
+ break;
53
default:
54
g_assert_not_reached();
55
}
56
--
57
2.34.1
58
59
diff view generated by jsdifflib
New patch
1
Fill in the parameters for libffi for Int128.
2
Adjust the interpreter to allow for 16-byte return values.
3
Adjust tcg_out_call to record the return value length.
1
4
5
Call parameters are no longer all the same size, so we
6
cannot reuse the same call_slots array for every function.
7
Compute it each time now, but only fill in slots required
8
for the call we're about to make.
9
10
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
13
tcg/tci/tcg-target.h | 3 +++
14
tcg/tcg.c | 19 +++++++++++++++++
15
tcg/tci.c | 44 ++++++++++++++++++++--------------------
16
tcg/tci/tcg-target.c.inc | 10 ++++-----
17
4 files changed, 49 insertions(+), 27 deletions(-)
18
19
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
20
index XXXXXXX..XXXXXXX 100644
21
--- a/tcg/tci/tcg-target.h
22
+++ b/tcg/tci/tcg-target.h
23
@@ -XXX,XX +XXX,XX @@ typedef enum {
24
#if TCG_TARGET_REG_BITS == 32
25
# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EVEN
26
# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN
27
+# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN
28
#else
29
# define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
30
# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
31
+# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL
32
#endif
33
+#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
34
35
#define HAVE_TCG_QEMU_TB_EXEC
36
#define TCG_TARGET_NEED_POOL_LABELS
37
diff --git a/tcg/tcg.c b/tcg/tcg.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/tcg/tcg.c
40
+++ b/tcg/tcg.c
41
@@ -XXX,XX +XXX,XX @@ static GHashTable *helper_table;
42
#ifdef CONFIG_TCG_INTERPRETER
43
static ffi_type *typecode_to_ffi(int argmask)
44
{
45
+ /*
46
+ * libffi does not support __int128_t, so we have forced Int128
47
+ * to use the structure definition instead of the builtin type.
48
+ */
49
+ static ffi_type *ffi_type_i128_elements[3] = {
50
+ &ffi_type_uint64,
51
+ &ffi_type_uint64,
52
+ NULL
53
+ };
54
+ static ffi_type ffi_type_i128 = {
55
+ .size = 16,
56
+ .alignment = __alignof__(Int128),
57
+ .type = FFI_TYPE_STRUCT,
58
+ .elements = ffi_type_i128_elements,
59
+ };
60
+
61
switch (argmask) {
62
case dh_typecode_void:
63
return &ffi_type_void;
64
@@ -XXX,XX +XXX,XX @@ static ffi_type *typecode_to_ffi(int argmask)
65
return &ffi_type_sint64;
66
case dh_typecode_ptr:
67
return &ffi_type_pointer;
68
+ case dh_typecode_i128:
69
+ return &ffi_type_i128;
70
}
71
g_assert_not_reached();
72
}
73
@@ -XXX,XX +XXX,XX @@ static void init_ffi_layouts(void)
74
/* Ignoring the return type, find the last non-zero field. */
75
nargs = 32 - clz32(typemask >> 3);
76
nargs = DIV_ROUND_UP(nargs, 3);
77
+ assert(nargs <= MAX_CALL_IARGS);
78
79
ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
80
ca->cif.rtype = typecode_to_ffi(typemask & 7);
81
diff --git a/tcg/tci.c b/tcg/tci.c
82
index XXXXXXX..XXXXXXX 100644
83
--- a/tcg/tci.c
84
+++ b/tcg/tci.c
85
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
86
tcg_target_ulong regs[TCG_TARGET_NB_REGS];
87
uint64_t stack[(TCG_STATIC_CALL_ARGS_SIZE + TCG_STATIC_FRAME_SIZE)
88
/ sizeof(uint64_t)];
89
- void *call_slots[TCG_STATIC_CALL_ARGS_SIZE / sizeof(uint64_t)];
90
91
regs[TCG_AREG0] = (tcg_target_ulong)env;
92
regs[TCG_REG_CALL_STACK] = (uintptr_t)stack;
93
- /* Other call_slots entries initialized at first use (see below). */
94
- call_slots[0] = NULL;
95
tci_assert(tb_ptr);
96
97
for (;;) {
98
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
99
100
switch (opc) {
101
case INDEX_op_call:
102
- /*
103
- * Set up the ffi_avalue array once, delayed until now
104
- * because many TB's do not make any calls. In tcg_gen_callN,
105
- * we arranged for every real argument to be "left-aligned"
106
- * in each 64-bit slot.
107
- */
108
- if (unlikely(call_slots[0] == NULL)) {
109
- for (int i = 0; i < ARRAY_SIZE(call_slots); ++i) {
110
- call_slots[i] = &stack[i];
111
- }
112
- }
113
-
114
- tci_args_nl(insn, tb_ptr, &len, &ptr);
115
-
116
- /* Helper functions may need to access the "return address" */
117
- tci_tb_ptr = (uintptr_t)tb_ptr;
118
-
119
{
120
- void **pptr = ptr;
121
- ffi_call(pptr[1], pptr[0], stack, call_slots);
122
+ void *call_slots[MAX_CALL_IARGS];
123
+ ffi_cif *cif;
124
+ void *func;
125
+ unsigned i, s, n;
126
+
127
+ tci_args_nl(insn, tb_ptr, &len, &ptr);
128
+ func = ((void **)ptr)[0];
129
+ cif = ((void **)ptr)[1];
130
+
131
+ n = cif->nargs;
132
+ for (i = s = 0; i < n; ++i) {
133
+ ffi_type *t = cif->arg_types[i];
134
+ call_slots[i] = &stack[s];
135
+ s += DIV_ROUND_UP(t->size, 8);
136
+ }
137
+
138
+ /* Helper functions may need to access the "return address" */
139
+ tci_tb_ptr = (uintptr_t)tb_ptr;
140
+ ffi_call(cif, func, stack, call_slots);
141
}
142
143
switch (len) {
144
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
145
*/
146
memcpy(&regs[TCG_REG_R0], stack, 8);
147
break;
148
+ case 3: /* Int128 */
149
+ memcpy(&regs[TCG_REG_R0], stack, 16);
150
+ break;
151
default:
152
g_assert_not_reached();
153
}
154
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
155
index XXXXXXX..XXXXXXX 100644
156
--- a/tcg/tci/tcg-target.c.inc
157
+++ b/tcg/tci/tcg-target.c.inc
158
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_call_iarg_regs[] = { };
159
static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
160
{
161
tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
162
- tcg_debug_assert(slot >= 0 && slot < 64 / TCG_TARGET_REG_BITS);
163
+ tcg_debug_assert(slot >= 0 && slot < 128 / TCG_TARGET_REG_BITS);
164
return TCG_REG_R0 + slot;
165
}
166
167
@@ -XXX,XX +XXX,XX @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *func,
168
169
if (cif->rtype == &ffi_type_void) {
170
which = 0;
171
- } else if (cif->rtype->size == 4) {
172
- which = 1;
173
} else {
174
- tcg_debug_assert(cif->rtype->size == 8);
175
- which = 2;
176
+ tcg_debug_assert(cif->rtype->size == 4 ||
177
+ cif->rtype->size == 8 ||
178
+ cif->rtype->size == 16);
179
+ which = ctz32(cif->rtype->size) - 1;
180
}
181
new_pool_l2(s, 20, s->code_ptr, 0, (uintptr_t)func, (uintptr_t)cif);
182
insn = deposit32(insn, 0, 8, INDEX_op_call);
183
--
184
2.34.1
185
186
diff view generated by jsdifflib
New patch
1
Fill in the parameters for the host ABI for Int128 for
2
those backends which require no extra modification.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/aarch64/tcg-target.h | 2 ++
9
tcg/arm/tcg-target.h | 2 ++
10
tcg/loongarch64/tcg-target.h | 2 ++
11
tcg/mips/tcg-target.h | 2 ++
12
tcg/riscv/tcg-target.h | 3 +++
13
tcg/s390x/tcg-target.h | 2 ++
14
tcg/sparc64/tcg-target.h | 2 ++
15
tcg/tcg.c | 6 +++---
16
tcg/ppc/tcg-target.c.inc | 3 +++
17
9 files changed, 21 insertions(+), 3 deletions(-)
18
19
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
20
index XXXXXXX..XXXXXXX 100644
21
--- a/tcg/aarch64/tcg-target.h
22
+++ b/tcg/aarch64/tcg-target.h
23
@@ -XXX,XX +XXX,XX @@ typedef enum {
24
#define TCG_TARGET_CALL_STACK_OFFSET 0
25
#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
26
#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
27
+#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN
28
+#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
29
30
/* optional instructions */
31
#define TCG_TARGET_HAS_div_i32 1
32
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
33
index XXXXXXX..XXXXXXX 100644
34
--- a/tcg/arm/tcg-target.h
35
+++ b/tcg/arm/tcg-target.h
36
@@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions;
37
#define TCG_TARGET_CALL_STACK_OFFSET    0
38
#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
39
#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN
40
+#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN
41
+#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF
42
43
/* optional instructions */
44
#define TCG_TARGET_HAS_ext8s_i32 1
45
diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
46
index XXXXXXX..XXXXXXX 100644
47
--- a/tcg/loongarch64/tcg-target.h
48
+++ b/tcg/loongarch64/tcg-target.h
49
@@ -XXX,XX +XXX,XX @@ typedef enum {
50
#define TCG_TARGET_CALL_STACK_OFFSET 0
51
#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
52
#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
53
+#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL
54
+#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
55
56
/* optional instructions */
57
#define TCG_TARGET_HAS_movcond_i32 1
58
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
59
index XXXXXXX..XXXXXXX 100644
60
--- a/tcg/mips/tcg-target.h
61
+++ b/tcg/mips/tcg-target.h
62
@@ -XXX,XX +XXX,XX @@ typedef enum {
63
# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
64
#endif
65
#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
66
+#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN
67
+#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
68
69
/* MOVN/MOVZ instructions detection */
70
#if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \
71
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
72
index XXXXXXX..XXXXXXX 100644
73
--- a/tcg/riscv/tcg-target.h
74
+++ b/tcg/riscv/tcg-target.h
75
@@ -XXX,XX +XXX,XX @@ typedef enum {
76
#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL
77
#if TCG_TARGET_REG_BITS == 32
78
#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN
79
+#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN
80
#else
81
#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
82
+#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL
83
#endif
84
+#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
85
86
/* optional instructions */
87
#define TCG_TARGET_HAS_movcond_i32 0
88
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
89
index XXXXXXX..XXXXXXX 100644
90
--- a/tcg/s390x/tcg-target.h
91
+++ b/tcg/s390x/tcg-target.h
92
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
93
#define TCG_TARGET_CALL_STACK_OFFSET    160
94
#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EXTEND
95
#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
96
+#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_BY_REF
97
+#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF
98
99
#define TCG_TARGET_HAS_MEMORY_BSWAP 1
100
101
diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h
102
index XXXXXXX..XXXXXXX 100644
103
--- a/tcg/sparc64/tcg-target.h
104
+++ b/tcg/sparc64/tcg-target.h
105
@@ -XXX,XX +XXX,XX @@ typedef enum {
106
#define TCG_TARGET_CALL_STACK_OFFSET (128 + 6*8 + TCG_TARGET_STACK_BIAS)
107
#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EXTEND
108
#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
109
+#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL
110
+#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
111
112
#if defined(__VIS__) && __VIS__ >= 0x300
113
#define use_vis3_instructions 1
114
diff --git a/tcg/tcg.c b/tcg/tcg.c
115
index XXXXXXX..XXXXXXX 100644
116
--- a/tcg/tcg.c
117
+++ b/tcg/tcg.c
118
@@ -XXX,XX +XXX,XX @@ static void init_call_layout(TCGHelperInfo *info)
119
break;
120
case dh_typecode_i128:
121
info->nr_out = 128 / TCG_TARGET_REG_BITS;
122
- info->out_kind = TCG_CALL_RET_NORMAL; /* TODO */
123
- switch (/* TODO */ TCG_CALL_RET_NORMAL) {
124
+ info->out_kind = TCG_TARGET_CALL_RET_I128;
125
+ switch (TCG_TARGET_CALL_RET_I128) {
126
case TCG_CALL_RET_NORMAL:
127
/* Query the last register now to trigger any assert early. */
128
tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
129
@@ -XXX,XX +XXX,XX @@ static void init_call_layout(TCGHelperInfo *info)
130
break;
131
132
case TCG_TYPE_I128:
133
- switch (/* TODO */ TCG_CALL_ARG_NORMAL) {
134
+ switch (TCG_TARGET_CALL_ARG_I128) {
135
case TCG_CALL_ARG_EVEN:
136
layout_arg_even(&cum);
137
/* fall through */
138
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
139
index XXXXXXX..XXXXXXX 100644
140
--- a/tcg/ppc/tcg-target.c.inc
141
+++ b/tcg/ppc/tcg-target.c.inc
142
@@ -XXX,XX +XXX,XX @@
143
#else
144
# define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
145
#endif
146
+/* Note sysv arg alignment applies only to 2-word types, not more. */
147
+#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL
148
+#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
149
150
/* For some memory operations, we need a scratch that isn't R0. For the AIX
151
calling convention, we can re-use the TOC register since we'll be reloading
152
--
153
2.34.1
154
155
diff view generated by jsdifflib
1
Similar to tcg_const_ptr, defer to tcg_constant_{i32,i64}.
1
This enables allocation of i128. The type is not yet
2
usable, as we have not yet added data movement ops.
2
3
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
7
---
6
include/tcg/tcg.h | 2 ++
8
include/tcg/tcg.h | 32 +++++++++++++++++++++++++
7
1 file changed, 2 insertions(+)
9
tcg/tcg.c | 60 +++++++++++++++++++++++++++++++++--------------
10
2 files changed, 74 insertions(+), 18 deletions(-)
8
11
9
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
12
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
10
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
11
--- a/include/tcg/tcg.h
14
--- a/include/tcg/tcg.h
12
+++ b/include/tcg/tcg.h
15
+++ b/include/tcg/tcg.h
13
@@ -XXX,XX +XXX,XX @@ TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val);
16
@@ -XXX,XX +XXX,XX @@ static inline TCGTemp *tcgv_i64_temp(TCGv_i64 v)
14
#if UINTPTR_MAX == UINT32_MAX
17
return tcgv_i32_temp((TCGv_i32)v);
15
# define tcg_const_ptr(x) ((TCGv_ptr)tcg_const_i32((intptr_t)(x)))
18
}
16
# define tcg_const_local_ptr(x) ((TCGv_ptr)tcg_const_local_i32((intptr_t)(x)))
19
17
+# define tcg_constant_ptr(x) ((TCGv_ptr)tcg_constant_i32((intptr_t)(x)))
20
+static inline TCGTemp *tcgv_i128_temp(TCGv_i128 v)
18
#else
21
+{
19
# define tcg_const_ptr(x) ((TCGv_ptr)tcg_const_i64((intptr_t)(x)))
22
+ return tcgv_i32_temp((TCGv_i32)v);
20
# define tcg_const_local_ptr(x) ((TCGv_ptr)tcg_const_local_i64((intptr_t)(x)))
23
+}
21
+# define tcg_constant_ptr(x) ((TCGv_ptr)tcg_constant_i64((intptr_t)(x)))
24
+
22
#endif
25
static inline TCGTemp *tcgv_ptr_temp(TCGv_ptr v)
23
26
{
24
TCGLabel *gen_new_label(void);
27
return tcgv_i32_temp((TCGv_i32)v);
28
@@ -XXX,XX +XXX,XX @@ static inline TCGArg tcgv_i64_arg(TCGv_i64 v)
29
return temp_arg(tcgv_i64_temp(v));
30
}
31
32
+static inline TCGArg tcgv_i128_arg(TCGv_i128 v)
33
+{
34
+ return temp_arg(tcgv_i128_temp(v));
35
+}
36
+
37
static inline TCGArg tcgv_ptr_arg(TCGv_ptr v)
38
{
39
return temp_arg(tcgv_ptr_temp(v));
40
@@ -XXX,XX +XXX,XX @@ static inline TCGv_i64 temp_tcgv_i64(TCGTemp *t)
41
return (TCGv_i64)temp_tcgv_i32(t);
42
}
43
44
+static inline TCGv_i128 temp_tcgv_i128(TCGTemp *t)
45
+{
46
+ return (TCGv_i128)temp_tcgv_i32(t);
47
+}
48
+
49
static inline TCGv_ptr temp_tcgv_ptr(TCGTemp *t)
50
{
51
return (TCGv_ptr)temp_tcgv_i32(t);
52
@@ -XXX,XX +XXX,XX @@ static inline void tcg_temp_free_i64(TCGv_i64 arg)
53
tcg_temp_free_internal(tcgv_i64_temp(arg));
54
}
55
56
+static inline void tcg_temp_free_i128(TCGv_i128 arg)
57
+{
58
+ tcg_temp_free_internal(tcgv_i128_temp(arg));
59
+}
60
+
61
static inline void tcg_temp_free_ptr(TCGv_ptr arg)
62
{
63
tcg_temp_free_internal(tcgv_ptr_temp(arg));
64
@@ -XXX,XX +XXX,XX @@ static inline TCGv_i64 tcg_temp_local_new_i64(void)
65
return temp_tcgv_i64(t);
66
}
67
68
+static inline TCGv_i128 tcg_temp_new_i128(void)
69
+{
70
+ TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I128, false);
71
+ return temp_tcgv_i128(t);
72
+}
73
+
74
+static inline TCGv_i128 tcg_temp_local_new_i128(void)
75
+{
76
+ TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I128, true);
77
+ return temp_tcgv_i128(t);
78
+}
79
+
80
static inline TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t offset,
81
const char *name)
82
{
83
diff --git a/tcg/tcg.c b/tcg/tcg.c
84
index XXXXXXX..XXXXXXX 100644
85
--- a/tcg/tcg.c
86
+++ b/tcg/tcg.c
87
@@ -XXX,XX +XXX,XX @@ TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
88
tcg_debug_assert(ts->base_type == type);
89
tcg_debug_assert(ts->kind == kind);
90
} else {
91
+ int i, n;
92
+
93
+ switch (type) {
94
+ case TCG_TYPE_I32:
95
+ case TCG_TYPE_V64:
96
+ case TCG_TYPE_V128:
97
+ case TCG_TYPE_V256:
98
+ n = 1;
99
+ break;
100
+ case TCG_TYPE_I64:
101
+ n = 64 / TCG_TARGET_REG_BITS;
102
+ break;
103
+ case TCG_TYPE_I128:
104
+ n = 128 / TCG_TARGET_REG_BITS;
105
+ break;
106
+ default:
107
+ g_assert_not_reached();
108
+ }
109
+
110
ts = tcg_temp_alloc(s);
111
- if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
112
- TCGTemp *ts2 = tcg_temp_alloc(s);
113
+ ts->base_type = type;
114
+ ts->temp_allocated = 1;
115
+ ts->kind = kind;
116
117
- ts->base_type = type;
118
- ts->type = TCG_TYPE_I32;
119
- ts->temp_allocated = 1;
120
- ts->kind = kind;
121
-
122
- tcg_debug_assert(ts2 == ts + 1);
123
- ts2->base_type = TCG_TYPE_I64;
124
- ts2->type = TCG_TYPE_I32;
125
- ts2->temp_allocated = 1;
126
- ts2->temp_subindex = 1;
127
- ts2->kind = kind;
128
- } else {
129
- ts->base_type = type;
130
+ if (n == 1) {
131
ts->type = type;
132
- ts->temp_allocated = 1;
133
- ts->kind = kind;
134
+ } else {
135
+ ts->type = TCG_TYPE_REG;
136
+
137
+ for (i = 1; i < n; ++i) {
138
+ TCGTemp *ts2 = tcg_temp_alloc(s);
139
+
140
+ tcg_debug_assert(ts2 == ts + i);
141
+ ts2->base_type = type;
142
+ ts2->type = TCG_TYPE_REG;
143
+ ts2->temp_allocated = 1;
144
+ ts2->temp_subindex = i;
145
+ ts2->kind = kind;
146
+ }
147
}
148
}
149
150
@@ -XXX,XX +XXX,XX @@ static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
151
case TCG_TYPE_V64:
152
align = 8;
153
break;
154
+ case TCG_TYPE_I128:
155
case TCG_TYPE_V128:
156
case TCG_TYPE_V256:
157
- /* Note that we do not require aligned storage for V256. */
158
+ /*
159
+ * Note that we do not require aligned storage for V256,
160
+ * and that we provide alignment for I128 to match V128,
161
+ * even if that's above what the host ABI requires.
162
+ */
163
align = 16;
164
break;
165
default:
25
--
166
--
26
2.34.1
167
2.34.1
27
168
28
169
diff view generated by jsdifflib
New patch
1
Add code generation functions for data movement between
2
TCGv_i128 (mov) and to/from TCGv_i64 (concat, extract).
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
include/tcg/tcg-op.h | 4 ++++
9
tcg/tcg-internal.h | 13 +++++++++++++
10
tcg/tcg-op.c | 20 ++++++++++++++++++++
11
3 files changed, 37 insertions(+)
12
13
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/include/tcg/tcg-op.h
16
+++ b/include/tcg/tcg-op.h
17
@@ -XXX,XX +XXX,XX @@ void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg);
18
void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg);
19
void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg);
20
21
+void tcg_gen_mov_i128(TCGv_i128 dst, TCGv_i128 src);
22
+void tcg_gen_extr_i128_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i128 arg);
23
+void tcg_gen_concat_i64_i128(TCGv_i128 ret, TCGv_i64 lo, TCGv_i64 hi);
24
+
25
static inline void tcg_gen_concat32_i64(TCGv_i64 ret, TCGv_i64 lo, TCGv_i64 hi)
26
{
27
tcg_gen_deposit_i64(ret, lo, hi, 32, 32);
28
diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h
29
index XXXXXXX..XXXXXXX 100644
30
--- a/tcg/tcg-internal.h
31
+++ b/tcg/tcg-internal.h
32
@@ -XXX,XX +XXX,XX @@ extern TCGv_i32 TCGV_LOW(TCGv_i64) QEMU_ERROR("32-bit code path is reachable");
33
extern TCGv_i32 TCGV_HIGH(TCGv_i64) QEMU_ERROR("32-bit code path is reachable");
34
#endif
35
36
+static inline TCGv_i64 TCGV128_LOW(TCGv_i128 t)
37
+{
38
+ /* For 32-bit, offset by 2, which may then have TCGV_{LOW,HIGH} applied. */
39
+ int o = HOST_BIG_ENDIAN ? 64 / TCG_TARGET_REG_BITS : 0;
40
+ return temp_tcgv_i64(tcgv_i128_temp(t) + o);
41
+}
42
+
43
+static inline TCGv_i64 TCGV128_HIGH(TCGv_i128 t)
44
+{
45
+ int o = HOST_BIG_ENDIAN ? 0 : 64 / TCG_TARGET_REG_BITS;
46
+ return temp_tcgv_i64(tcgv_i128_temp(t) + o);
47
+}
48
+
49
#endif /* TCG_INTERNAL_H */
50
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
51
index XXXXXXX..XXXXXXX 100644
52
--- a/tcg/tcg-op.c
53
+++ b/tcg/tcg-op.c
54
@@ -XXX,XX +XXX,XX @@ void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg)
55
tcg_gen_shri_i64(hi, arg, 32);
56
}
57
58
+void tcg_gen_extr_i128_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i128 arg)
59
+{
60
+ tcg_gen_mov_i64(lo, TCGV128_LOW(arg));
61
+ tcg_gen_mov_i64(hi, TCGV128_HIGH(arg));
62
+}
63
+
64
+void tcg_gen_concat_i64_i128(TCGv_i128 ret, TCGv_i64 lo, TCGv_i64 hi)
65
+{
66
+ tcg_gen_mov_i64(TCGV128_LOW(ret), lo);
67
+ tcg_gen_mov_i64(TCGV128_HIGH(ret), hi);
68
+}
69
+
70
+void tcg_gen_mov_i128(TCGv_i128 dst, TCGv_i128 src)
71
+{
72
+ if (dst != src) {
73
+ tcg_gen_mov_i64(TCGV128_LOW(dst), TCGV128_LOW(src));
74
+ tcg_gen_mov_i64(TCGV128_HIGH(dst), TCGV128_HIGH(src));
75
+ }
76
+}
77
+
78
/* QEMU specific operations. */
79
80
void tcg_gen_exit_tb(const TranslationBlock *tb, unsigned idx)
81
--
82
2.34.1
83
84
diff view generated by jsdifflib
1
The last use of this macro was removed in f3e182b10013
1
These are not yet considering atomicity of the 16-byte value;
2
("accel/tcg: Push trace info building into atomic_common.c.inc")
2
this is a direct replacement for the current target code which
3
uses a pair of 8-byte operations.
3
4
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
---
7
accel/tcg/cputlb.c | 1 -
8
include/exec/cpu_ldst.h | 10 +++
8
accel/tcg/user-exec.c | 1 -
9
include/tcg/tcg-op.h | 2 +
9
2 files changed, 2 deletions(-)
10
accel/tcg/cputlb.c | 112 +++++++++++++++++++++++++++++++++
11
accel/tcg/user-exec.c | 66 ++++++++++++++++++++
12
tcg/tcg-op.c | 134 ++++++++++++++++++++++++++++++++++++++++
13
5 files changed, 324 insertions(+)
10
14
15
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/include/exec/cpu_ldst.h
18
+++ b/include/exec/cpu_ldst.h
19
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr ptr,
20
uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr ptr,
21
MemOpIdx oi, uintptr_t ra);
22
23
+Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
24
+ MemOpIdx oi, uintptr_t ra);
25
+Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
26
+ MemOpIdx oi, uintptr_t ra);
27
+
28
void cpu_stb_mmu(CPUArchState *env, abi_ptr ptr, uint8_t val,
29
MemOpIdx oi, uintptr_t ra);
30
void cpu_stw_be_mmu(CPUArchState *env, abi_ptr ptr, uint16_t val,
31
@@ -XXX,XX +XXX,XX @@ void cpu_stl_le_mmu(CPUArchState *env, abi_ptr ptr, uint32_t val,
32
void cpu_stq_le_mmu(CPUArchState *env, abi_ptr ptr, uint64_t val,
33
MemOpIdx oi, uintptr_t ra);
34
35
+void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
36
+ MemOpIdx oi, uintptr_t ra);
37
+void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
38
+ MemOpIdx oi, uintptr_t ra);
39
+
40
uint32_t cpu_atomic_cmpxchgb_mmu(CPUArchState *env, target_ulong addr,
41
uint32_t cmpv, uint32_t newv,
42
MemOpIdx oi, uintptr_t retaddr);
43
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
44
index XXXXXXX..XXXXXXX 100644
45
--- a/include/tcg/tcg-op.h
46
+++ b/include/tcg/tcg-op.h
47
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i32(TCGv_i32, TCGv, TCGArg, MemOp);
48
void tcg_gen_qemu_st_i32(TCGv_i32, TCGv, TCGArg, MemOp);
49
void tcg_gen_qemu_ld_i64(TCGv_i64, TCGv, TCGArg, MemOp);
50
void tcg_gen_qemu_st_i64(TCGv_i64, TCGv, TCGArg, MemOp);
51
+void tcg_gen_qemu_ld_i128(TCGv_i128, TCGv, TCGArg, MemOp);
52
+void tcg_gen_qemu_st_i128(TCGv_i128, TCGv, TCGArg, MemOp);
53
54
static inline void tcg_gen_qemu_ld8u(TCGv ret, TCGv addr, int mem_index)
55
{
11
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
56
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
12
index XXXXXXX..XXXXXXX 100644
57
index XXXXXXX..XXXXXXX 100644
13
--- a/accel/tcg/cputlb.c
58
--- a/accel/tcg/cputlb.c
14
+++ b/accel/tcg/cputlb.c
59
+++ b/accel/tcg/cputlb.c
60
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
61
return cpu_load_helper(env, addr, oi, ra, helper_le_ldq_mmu);
62
}
63
64
+Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
65
+ MemOpIdx oi, uintptr_t ra)
66
+{
67
+ MemOp mop = get_memop(oi);
68
+ int mmu_idx = get_mmuidx(oi);
69
+ MemOpIdx new_oi;
70
+ unsigned a_bits;
71
+ uint64_t h, l;
72
+
73
+ tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_BE|MO_128));
74
+ a_bits = get_alignment_bits(mop);
75
+
76
+ /* Handle CPU specific unaligned behaviour */
77
+ if (addr & ((1 << a_bits) - 1)) {
78
+ cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_LOAD,
79
+ mmu_idx, ra);
80
+ }
81
+
82
+ /* Construct an unaligned 64-bit replacement MemOpIdx. */
83
+ mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
84
+ new_oi = make_memop_idx(mop, mmu_idx);
85
+
86
+ h = helper_be_ldq_mmu(env, addr, new_oi, ra);
87
+ l = helper_be_ldq_mmu(env, addr + 8, new_oi, ra);
88
+
89
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
90
+ return int128_make128(l, h);
91
+}
92
+
93
+Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
94
+ MemOpIdx oi, uintptr_t ra)
95
+{
96
+ MemOp mop = get_memop(oi);
97
+ int mmu_idx = get_mmuidx(oi);
98
+ MemOpIdx new_oi;
99
+ unsigned a_bits;
100
+ uint64_t h, l;
101
+
102
+ tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_LE|MO_128));
103
+ a_bits = get_alignment_bits(mop);
104
+
105
+ /* Handle CPU specific unaligned behaviour */
106
+ if (addr & ((1 << a_bits) - 1)) {
107
+ cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_LOAD,
108
+ mmu_idx, ra);
109
+ }
110
+
111
+ /* Construct an unaligned 64-bit replacement MemOpIdx. */
112
+ mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
113
+ new_oi = make_memop_idx(mop, mmu_idx);
114
+
115
+ l = helper_le_ldq_mmu(env, addr, new_oi, ra);
116
+ h = helper_le_ldq_mmu(env, addr + 8, new_oi, ra);
117
+
118
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
119
+ return int128_make128(l, h);
120
+}
121
+
122
/*
123
* Store Helpers
124
*/
15
@@ -XXX,XX +XXX,XX @@ void cpu_stq_le_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
125
@@ -XXX,XX +XXX,XX @@ void cpu_stq_le_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
16
glue(glue(glue(cpu_atomic_ ## X, SUFFIX), END), _mmu)
126
cpu_store_helper(env, addr, val, oi, retaddr, helper_le_stq_mmu);
17
127
}
18
#define ATOMIC_MMU_CLEANUP
128
19
-#define ATOMIC_MMU_IDX get_mmuidx(oi)
129
+void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
20
130
+ MemOpIdx oi, uintptr_t ra)
21
#include "atomic_common.c.inc"
131
+{
22
132
+ MemOp mop = get_memop(oi);
133
+ int mmu_idx = get_mmuidx(oi);
134
+ MemOpIdx new_oi;
135
+ unsigned a_bits;
136
+
137
+ tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_BE|MO_128));
138
+ a_bits = get_alignment_bits(mop);
139
+
140
+ /* Handle CPU specific unaligned behaviour */
141
+ if (addr & ((1 << a_bits) - 1)) {
142
+ cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
143
+ mmu_idx, ra);
144
+ }
145
+
146
+ /* Construct an unaligned 64-bit replacement MemOpIdx. */
147
+ mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
148
+ new_oi = make_memop_idx(mop, mmu_idx);
149
+
150
+ helper_be_stq_mmu(env, addr, int128_gethi(val), new_oi, ra);
151
+ helper_be_stq_mmu(env, addr + 8, int128_getlo(val), new_oi, ra);
152
+
153
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
154
+}
155
+
156
+void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
157
+ MemOpIdx oi, uintptr_t ra)
158
+{
159
+ MemOp mop = get_memop(oi);
160
+ int mmu_idx = get_mmuidx(oi);
161
+ MemOpIdx new_oi;
162
+ unsigned a_bits;
163
+
164
+ tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_LE|MO_128));
165
+ a_bits = get_alignment_bits(mop);
166
+
167
+ /* Handle CPU specific unaligned behaviour */
168
+ if (addr & ((1 << a_bits) - 1)) {
169
+ cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
170
+ mmu_idx, ra);
171
+ }
172
+
173
+ /* Construct an unaligned 64-bit replacement MemOpIdx. */
174
+ mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
175
+ new_oi = make_memop_idx(mop, mmu_idx);
176
+
177
+ helper_le_stq_mmu(env, addr, int128_getlo(val), new_oi, ra);
178
+ helper_le_stq_mmu(env, addr + 8, int128_gethi(val), new_oi, ra);
179
+
180
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
181
+}
182
+
183
#include "ldst_common.c.inc"
184
185
/*
23
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
186
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
24
index XXXXXXX..XXXXXXX 100644
187
index XXXXXXX..XXXXXXX 100644
25
--- a/accel/tcg/user-exec.c
188
--- a/accel/tcg/user-exec.c
26
+++ b/accel/tcg/user-exec.c
189
+++ b/accel/tcg/user-exec.c
27
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
190
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
28
#define ATOMIC_NAME(X) \
191
return ret;
29
glue(glue(glue(cpu_atomic_ ## X, SUFFIX), END), _mmu)
192
}
30
#define ATOMIC_MMU_CLEANUP do { clear_helper_retaddr(); } while (0)
193
31
-#define ATOMIC_MMU_IDX MMU_USER_IDX
194
+Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
32
195
+ MemOpIdx oi, uintptr_t ra)
33
#define DATA_SIZE 1
196
+{
34
#include "atomic_template.h"
197
+ void *haddr;
198
+ Int128 ret;
199
+
200
+ validate_memop(oi, MO_128 | MO_BE);
201
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
202
+ memcpy(&ret, haddr, 16);
203
+ clear_helper_retaddr();
204
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
205
+
206
+ if (!HOST_BIG_ENDIAN) {
207
+ ret = bswap128(ret);
208
+ }
209
+ return ret;
210
+}
211
+
212
+Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
213
+ MemOpIdx oi, uintptr_t ra)
214
+{
215
+ void *haddr;
216
+ Int128 ret;
217
+
218
+ validate_memop(oi, MO_128 | MO_LE);
219
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
220
+ memcpy(&ret, haddr, 16);
221
+ clear_helper_retaddr();
222
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
223
+
224
+ if (HOST_BIG_ENDIAN) {
225
+ ret = bswap128(ret);
226
+ }
227
+ return ret;
228
+}
229
+
230
void cpu_stb_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
231
MemOpIdx oi, uintptr_t ra)
232
{
233
@@ -XXX,XX +XXX,XX @@ void cpu_stq_le_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
234
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
235
}
236
237
+void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr,
238
+ Int128 val, MemOpIdx oi, uintptr_t ra)
239
+{
240
+ void *haddr;
241
+
242
+ validate_memop(oi, MO_128 | MO_BE);
243
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
244
+ if (!HOST_BIG_ENDIAN) {
245
+ val = bswap128(val);
246
+ }
247
+ memcpy(haddr, &val, 16);
248
+ clear_helper_retaddr();
249
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
250
+}
251
+
252
+void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr,
253
+ Int128 val, MemOpIdx oi, uintptr_t ra)
254
+{
255
+ void *haddr;
256
+
257
+ validate_memop(oi, MO_128 | MO_LE);
258
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
259
+ if (HOST_BIG_ENDIAN) {
260
+ val = bswap128(val);
261
+ }
262
+ memcpy(haddr, &val, 16);
263
+ clear_helper_retaddr();
264
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
265
+}
266
+
267
uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr ptr)
268
{
269
uint32_t ret;
270
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
271
index XXXXXXX..XXXXXXX 100644
272
--- a/tcg/tcg-op.c
273
+++ b/tcg/tcg-op.c
274
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
275
}
276
}
277
278
+static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
279
+{
280
+ MemOp mop_1 = orig, mop_2;
281
+
282
+ tcg_debug_assert((orig & MO_SIZE) == MO_128);
283
+ tcg_debug_assert((orig & MO_SIGN) == 0);
284
+
285
+ /* Use a memory ordering implemented by the host. */
286
+ if (!TCG_TARGET_HAS_MEMORY_BSWAP && (orig & MO_BSWAP)) {
287
+ mop_1 &= ~MO_BSWAP;
288
+ }
289
+
290
+ /* Reduce the size to 64-bit. */
291
+ mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
292
+
293
+ /* Retain the alignment constraints of the original. */
294
+ switch (orig & MO_AMASK) {
295
+ case MO_UNALN:
296
+ case MO_ALIGN_2:
297
+ case MO_ALIGN_4:
298
+ mop_2 = mop_1;
299
+ break;
300
+ case MO_ALIGN_8:
301
+ /* Prefer MO_ALIGN+MO_64 to MO_ALIGN_8+MO_64. */
302
+ mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
303
+ mop_2 = mop_1;
304
+ break;
305
+ case MO_ALIGN:
306
+ /* Second has 8-byte alignment; first has 16-byte alignment. */
307
+ mop_2 = mop_1;
308
+ mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN_16;
309
+ break;
310
+ case MO_ALIGN_16:
311
+ case MO_ALIGN_32:
312
+ case MO_ALIGN_64:
313
+ /* Second has 8-byte alignment; first retains original. */
314
+ mop_2 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
315
+ break;
316
+ default:
317
+ g_assert_not_reached();
318
+ }
319
+ ret[0] = mop_1;
320
+ ret[1] = mop_2;
321
+}
322
+
323
+void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
324
+{
325
+ MemOp mop[2];
326
+ TCGv addr_p8;
327
+ TCGv_i64 x, y;
328
+
329
+ canonicalize_memop_i128_as_i64(mop, memop);
330
+
331
+ tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
332
+ addr = plugin_prep_mem_callbacks(addr);
333
+
334
+ /* TODO: respect atomicity of the operation. */
335
+ /* TODO: allow the tcg backend to see the whole operation. */
336
+
337
+ /*
338
+ * Since there are no global TCGv_i128, there is no visible state
339
+ * changed if the second load faults. Load directly into the two
340
+ * subwords.
341
+ */
342
+ if ((memop & MO_BSWAP) == MO_LE) {
343
+ x = TCGV128_LOW(val);
344
+ y = TCGV128_HIGH(val);
345
+ } else {
346
+ x = TCGV128_HIGH(val);
347
+ y = TCGV128_LOW(val);
348
+ }
349
+
350
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
351
+
352
+ if ((mop[0] ^ memop) & MO_BSWAP) {
353
+ tcg_gen_bswap64_i64(x, x);
354
+ }
355
+
356
+ addr_p8 = tcg_temp_new();
357
+ tcg_gen_addi_tl(addr_p8, addr, 8);
358
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
359
+ tcg_temp_free(addr_p8);
360
+
361
+ if ((mop[0] ^ memop) & MO_BSWAP) {
362
+ tcg_gen_bswap64_i64(y, y);
363
+ }
364
+
365
+ plugin_gen_mem_callbacks(addr, make_memop_idx(memop, idx),
366
+ QEMU_PLUGIN_MEM_R);
367
+}
368
+
369
+void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
370
+{
371
+ MemOp mop[2];
372
+ TCGv addr_p8;
373
+ TCGv_i64 x, y;
374
+
375
+ canonicalize_memop_i128_as_i64(mop, memop);
376
+
377
+ tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
378
+ addr = plugin_prep_mem_callbacks(addr);
379
+
380
+ /* TODO: respect atomicity of the operation. */
381
+ /* TODO: allow the tcg backend to see the whole operation. */
382
+
383
+ if ((memop & MO_BSWAP) == MO_LE) {
384
+ x = TCGV128_LOW(val);
385
+ y = TCGV128_HIGH(val);
386
+ } else {
387
+ x = TCGV128_HIGH(val);
388
+ y = TCGV128_LOW(val);
389
+ }
390
+
391
+ addr_p8 = tcg_temp_new();
392
+ if ((mop[0] ^ memop) & MO_BSWAP) {
393
+ TCGv_i64 t = tcg_temp_new_i64();
394
+
395
+ tcg_gen_bswap64_i64(t, x);
396
+ gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
397
+ tcg_gen_bswap64_i64(t, y);
398
+ tcg_gen_addi_tl(addr_p8, addr, 8);
399
+ gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
400
+ tcg_temp_free_i64(t);
401
+ } else {
402
+ gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
403
+ tcg_gen_addi_tl(addr_p8, addr, 8);
404
+ gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
405
+ }
406
+ tcg_temp_free(addr_p8);
407
+
408
+ plugin_gen_mem_callbacks(addr, make_memop_idx(memop, idx),
409
+ QEMU_PLUGIN_MEM_W);
410
+}
411
+
412
static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
413
{
414
switch (opc & MO_SSIZE) {
35
--
415
--
36
2.34.1
416
2.34.1
417
418
diff view generated by jsdifflib
New patch
1
1
This will allow targets to avoid rolling their own.
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
accel/tcg/tcg-runtime.h | 11 +++++
8
include/tcg/tcg-op.h | 5 +++
9
tcg/tcg-op.c | 85 +++++++++++++++++++++++++++++++++++
10
accel/tcg/atomic_common.c.inc | 45 +++++++++++++++++++
11
4 files changed, 146 insertions(+)
12
13
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/accel/tcg/tcg-runtime.h
16
+++ b/accel/tcg/tcg-runtime.h
17
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(atomic_cmpxchgq_be, TCG_CALL_NO_WG,
18
DEF_HELPER_FLAGS_5(atomic_cmpxchgq_le, TCG_CALL_NO_WG,
19
i64, env, tl, i64, i64, i32)
20
#endif
21
+#ifdef CONFIG_CMPXCHG128
22
+DEF_HELPER_FLAGS_5(atomic_cmpxchgo_be, TCG_CALL_NO_WG,
23
+ i128, env, tl, i128, i128, i32)
24
+DEF_HELPER_FLAGS_5(atomic_cmpxchgo_le, TCG_CALL_NO_WG,
25
+ i128, env, tl, i128, i128, i32)
26
+#endif
27
+
28
+DEF_HELPER_FLAGS_5(nonatomic_cmpxchgo_be, TCG_CALL_NO_WG,
29
+ i128, env, tl, i128, i128, i32)
30
+DEF_HELPER_FLAGS_5(nonatomic_cmpxchgo_le, TCG_CALL_NO_WG,
31
+ i128, env, tl, i128, i128, i32)
32
33
#ifdef CONFIG_ATOMIC64
34
#define GEN_ATOMIC_HELPERS(NAME) \
35
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
36
index XXXXXXX..XXXXXXX 100644
37
--- a/include/tcg/tcg-op.h
38
+++ b/include/tcg/tcg-op.h
39
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGv_i32,
40
TCGArg, MemOp);
41
void tcg_gen_atomic_cmpxchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGv_i64,
42
TCGArg, MemOp);
43
+void tcg_gen_atomic_cmpxchg_i128(TCGv_i128, TCGv, TCGv_i128, TCGv_i128,
44
+ TCGArg, MemOp);
45
+
46
+void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128, TCGv, TCGv_i128, TCGv_i128,
47
+ TCGArg, MemOp);
48
49
void tcg_gen_atomic_xchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
50
void tcg_gen_atomic_xchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
51
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
52
index XXXXXXX..XXXXXXX 100644
53
--- a/tcg/tcg-op.c
54
+++ b/tcg/tcg-op.c
55
@@ -XXX,XX +XXX,XX @@ typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv,
56
TCGv_i32, TCGv_i32, TCGv_i32);
57
typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv,
58
TCGv_i64, TCGv_i64, TCGv_i32);
59
+typedef void (*gen_atomic_cx_i128)(TCGv_i128, TCGv_env, TCGv,
60
+ TCGv_i128, TCGv_i128, TCGv_i32);
61
typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv,
62
TCGv_i32, TCGv_i32);
63
typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv,
64
@@ -XXX,XX +XXX,XX @@ typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv,
65
#else
66
# define WITH_ATOMIC64(X)
67
#endif
68
+#ifdef CONFIG_CMPXCHG128
69
+# define WITH_ATOMIC128(X) X,
70
+#else
71
+# define WITH_ATOMIC128(X)
72
+#endif
73
74
static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
75
[MO_8] = gen_helper_atomic_cmpxchgb,
76
@@ -XXX,XX +XXX,XX @@ static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
77
[MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be,
78
WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le)
79
WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be)
80
+ WITH_ATOMIC128([MO_128 | MO_LE] = gen_helper_atomic_cmpxchgo_le)
81
+ WITH_ATOMIC128([MO_128 | MO_BE] = gen_helper_atomic_cmpxchgo_be)
82
};
83
84
void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
85
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
86
}
87
}
88
89
+void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
90
+ TCGv_i128 newv, TCGArg idx, MemOp memop)
91
+{
92
+ if (TCG_TARGET_REG_BITS == 32) {
93
+ /* Inline expansion below is simply too large for 32-bit hosts. */
94
+ gen_atomic_cx_i128 gen = ((memop & MO_BSWAP) == MO_LE
95
+ ? gen_helper_nonatomic_cmpxchgo_le
96
+ : gen_helper_nonatomic_cmpxchgo_be);
97
+ MemOpIdx oi = make_memop_idx(memop, idx);
98
+
99
+ tcg_debug_assert((memop & MO_SIZE) == MO_128);
100
+ tcg_debug_assert((memop & MO_SIGN) == 0);
101
+
102
+ gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
103
+ } else {
104
+ TCGv_i128 oldv = tcg_temp_new_i128();
105
+ TCGv_i128 tmpv = tcg_temp_new_i128();
106
+ TCGv_i64 t0 = tcg_temp_new_i64();
107
+ TCGv_i64 t1 = tcg_temp_new_i64();
108
+ TCGv_i64 z = tcg_constant_i64(0);
109
+
110
+ tcg_gen_qemu_ld_i128(oldv, addr, idx, memop);
111
+
112
+ /* Compare i128 */
113
+ tcg_gen_xor_i64(t0, TCGV128_LOW(oldv), TCGV128_LOW(cmpv));
114
+ tcg_gen_xor_i64(t1, TCGV128_HIGH(oldv), TCGV128_HIGH(cmpv));
115
+ tcg_gen_or_i64(t0, t0, t1);
116
+
117
+ /* tmpv = equal ? newv : oldv */
118
+ tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_LOW(tmpv), t0, z,
119
+ TCGV128_LOW(newv), TCGV128_LOW(oldv));
120
+ tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_HIGH(tmpv), t0, z,
121
+ TCGV128_HIGH(newv), TCGV128_HIGH(oldv));
122
+
123
+ /* Unconditional writeback. */
124
+ tcg_gen_qemu_st_i128(tmpv, addr, idx, memop);
125
+ tcg_gen_mov_i128(retv, oldv);
126
+
127
+ tcg_temp_free_i64(t0);
128
+ tcg_temp_free_i64(t1);
129
+ tcg_temp_free_i128(tmpv);
130
+ tcg_temp_free_i128(oldv);
131
+ }
132
+}
133
+
134
+void tcg_gen_atomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
135
+ TCGv_i128 newv, TCGArg idx, MemOp memop)
136
+{
137
+ gen_atomic_cx_i128 gen;
138
+
139
+ if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
140
+ tcg_gen_nonatomic_cmpxchg_i128(retv, addr, cmpv, newv, idx, memop);
141
+ return;
142
+ }
143
+
144
+ tcg_debug_assert((memop & MO_SIZE) == MO_128);
145
+ tcg_debug_assert((memop & MO_SIGN) == 0);
146
+ gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
147
+
148
+ if (gen) {
149
+ MemOpIdx oi = make_memop_idx(memop, idx);
150
+ gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
151
+ return;
152
+ }
153
+
154
+ gen_helper_exit_atomic(cpu_env);
155
+
156
+ /*
157
+ * Produce a result for a well-formed opcode stream. This satisfies
158
+ * liveness for set before used, which happens before this dead code
159
+ * is removed.
160
+ */
161
+ tcg_gen_movi_i64(TCGV128_LOW(retv), 0);
162
+ tcg_gen_movi_i64(TCGV128_HIGH(retv), 0);
163
+}
164
+
165
static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
166
TCGArg idx, MemOp memop, bool new_val,
167
void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
168
diff --git a/accel/tcg/atomic_common.c.inc b/accel/tcg/atomic_common.c.inc
169
index XXXXXXX..XXXXXXX 100644
170
--- a/accel/tcg/atomic_common.c.inc
171
+++ b/accel/tcg/atomic_common.c.inc
172
@@ -XXX,XX +XXX,XX @@ CMPXCHG_HELPER(cmpxchgq_be, uint64_t)
173
CMPXCHG_HELPER(cmpxchgq_le, uint64_t)
174
#endif
175
176
+#ifdef CONFIG_CMPXCHG128
177
+CMPXCHG_HELPER(cmpxchgo_be, Int128)
178
+CMPXCHG_HELPER(cmpxchgo_le, Int128)
179
+#endif
180
+
181
#undef CMPXCHG_HELPER
182
183
+Int128 HELPER(nonatomic_cmpxchgo_be)(CPUArchState *env, target_ulong addr,
184
+ Int128 cmpv, Int128 newv, uint32_t oi)
185
+{
186
+#if TCG_TARGET_REG_BITS == 32
187
+ uintptr_t ra = GETPC();
188
+ Int128 oldv;
189
+
190
+ oldv = cpu_ld16_be_mmu(env, addr, oi, ra);
191
+ if (int128_eq(oldv, cmpv)) {
192
+ cpu_st16_be_mmu(env, addr, newv, oi, ra);
193
+ } else {
194
+ /* Even with comparison failure, still need a write cycle. */
195
+ probe_write(env, addr, 16, get_mmuidx(oi), ra);
196
+ }
197
+ return oldv;
198
+#else
199
+ g_assert_not_reached();
200
+#endif
201
+}
202
+
203
+Int128 HELPER(nonatomic_cmpxchgo_le)(CPUArchState *env, target_ulong addr,
204
+ Int128 cmpv, Int128 newv, uint32_t oi)
205
+{
206
+#if TCG_TARGET_REG_BITS == 32
207
+ uintptr_t ra = GETPC();
208
+ Int128 oldv;
209
+
210
+ oldv = cpu_ld16_le_mmu(env, addr, oi, ra);
211
+ if (int128_eq(oldv, cmpv)) {
212
+ cpu_st16_le_mmu(env, addr, newv, oi, ra);
213
+ } else {
214
+ /* Even with comparison failure, still need a write cycle. */
215
+ probe_write(env, addr, 16, get_mmuidx(oi), ra);
216
+ }
217
+ return oldv;
218
+#else
219
+ g_assert_not_reached();
220
+#endif
221
+}
222
+
223
#define ATOMIC_HELPER(OP, TYPE) \
224
TYPE HELPER(glue(atomic_,OP))(CPUArchState *env, target_ulong addr, \
225
TYPE val, uint32_t oi) \
226
--
227
2.34.1
228
229
diff view generated by jsdifflib
New patch
1
1
Normally this is automatically handled by the CF_PARALLEL checks
2
with in tcg_gen_atomic_cmpxchg_i{32,64}, but x86 has a special
3
case of !PREFIX_LOCK where it always wants the non-atomic version.
4
5
Split these out so that x86 does not have to roll its own.
6
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
10
include/tcg/tcg-op.h | 4 ++
11
tcg/tcg-op.c | 154 +++++++++++++++++++++++++++----------------
12
2 files changed, 101 insertions(+), 57 deletions(-)
13
14
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/include/tcg/tcg-op.h
17
+++ b/include/tcg/tcg-op.h
18
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGv_i64,
19
void tcg_gen_atomic_cmpxchg_i128(TCGv_i128, TCGv, TCGv_i128, TCGv_i128,
20
TCGArg, MemOp);
21
22
+void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGv_i32,
23
+ TCGArg, MemOp);
24
+void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGv_i64,
25
+ TCGArg, MemOp);
26
void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128, TCGv, TCGv_i128, TCGv_i128,
27
TCGArg, MemOp);
28
29
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/tcg/tcg-op.c
32
+++ b/tcg/tcg-op.c
33
@@ -XXX,XX +XXX,XX @@ static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
34
WITH_ATOMIC128([MO_128 | MO_BE] = gen_helper_atomic_cmpxchgo_be)
35
};
36
37
+void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
38
+ TCGv_i32 newv, TCGArg idx, MemOp memop)
39
+{
40
+ TCGv_i32 t1 = tcg_temp_new_i32();
41
+ TCGv_i32 t2 = tcg_temp_new_i32();
42
+
43
+ tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
44
+
45
+ tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
46
+ tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
47
+ tcg_gen_qemu_st_i32(t2, addr, idx, memop);
48
+ tcg_temp_free_i32(t2);
49
+
50
+ if (memop & MO_SIGN) {
51
+ tcg_gen_ext_i32(retv, t1, memop);
52
+ } else {
53
+ tcg_gen_mov_i32(retv, t1);
54
+ }
55
+ tcg_temp_free_i32(t1);
56
+}
57
+
58
void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
59
TCGv_i32 newv, TCGArg idx, MemOp memop)
60
{
61
- memop = tcg_canonicalize_memop(memop, 0, 0);
62
+ gen_atomic_cx_i32 gen;
63
+ MemOpIdx oi;
64
65
if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
66
- TCGv_i32 t1 = tcg_temp_new_i32();
67
- TCGv_i32 t2 = tcg_temp_new_i32();
68
-
69
- tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
70
-
71
- tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
72
- tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
73
- tcg_gen_qemu_st_i32(t2, addr, idx, memop);
74
- tcg_temp_free_i32(t2);
75
-
76
- if (memop & MO_SIGN) {
77
- tcg_gen_ext_i32(retv, t1, memop);
78
- } else {
79
- tcg_gen_mov_i32(retv, t1);
80
- }
81
- tcg_temp_free_i32(t1);
82
- } else {
83
- gen_atomic_cx_i32 gen;
84
- MemOpIdx oi;
85
-
86
- gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
87
- tcg_debug_assert(gen != NULL);
88
-
89
- oi = make_memop_idx(memop & ~MO_SIGN, idx);
90
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
91
-
92
- if (memop & MO_SIGN) {
93
- tcg_gen_ext_i32(retv, retv, memop);
94
- }
95
+ tcg_gen_nonatomic_cmpxchg_i32(retv, addr, cmpv, newv, idx, memop);
96
+ return;
97
}
98
+
99
+ memop = tcg_canonicalize_memop(memop, 0, 0);
100
+ gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
101
+ tcg_debug_assert(gen != NULL);
102
+
103
+ oi = make_memop_idx(memop & ~MO_SIGN, idx);
104
+ gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
105
+
106
+ if (memop & MO_SIGN) {
107
+ tcg_gen_ext_i32(retv, retv, memop);
108
+ }
109
+}
110
+
111
+void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
112
+ TCGv_i64 newv, TCGArg idx, MemOp memop)
113
+{
114
+ TCGv_i64 t1, t2;
115
+
116
+ if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
117
+ tcg_gen_nonatomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
118
+ TCGV_LOW(newv), idx, memop);
119
+ if (memop & MO_SIGN) {
120
+ tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
121
+ } else {
122
+ tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
123
+ }
124
+ return;
125
+ }
126
+
127
+ t1 = tcg_temp_new_i64();
128
+ t2 = tcg_temp_new_i64();
129
+
130
+ tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
131
+
132
+ tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
133
+ tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
134
+ tcg_gen_qemu_st_i64(t2, addr, idx, memop);
135
+ tcg_temp_free_i64(t2);
136
+
137
+ if (memop & MO_SIGN) {
138
+ tcg_gen_ext_i64(retv, t1, memop);
139
+ } else {
140
+ tcg_gen_mov_i64(retv, t1);
141
+ }
142
+ tcg_temp_free_i64(t1);
143
}
144
145
void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
146
TCGv_i64 newv, TCGArg idx, MemOp memop)
147
{
148
- memop = tcg_canonicalize_memop(memop, 1, 0);
149
-
150
if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
151
- TCGv_i64 t1 = tcg_temp_new_i64();
152
- TCGv_i64 t2 = tcg_temp_new_i64();
153
+ tcg_gen_nonatomic_cmpxchg_i64(retv, addr, cmpv, newv, idx, memop);
154
+ return;
155
+ }
156
157
- tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
158
-
159
- tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
160
- tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
161
- tcg_gen_qemu_st_i64(t2, addr, idx, memop);
162
- tcg_temp_free_i64(t2);
163
-
164
- if (memop & MO_SIGN) {
165
- tcg_gen_ext_i64(retv, t1, memop);
166
- } else {
167
- tcg_gen_mov_i64(retv, t1);
168
- }
169
- tcg_temp_free_i64(t1);
170
- } else if ((memop & MO_SIZE) == MO_64) {
171
-#ifdef CONFIG_ATOMIC64
172
+ if ((memop & MO_SIZE) == MO_64) {
173
gen_atomic_cx_i64 gen;
174
- MemOpIdx oi;
175
176
+ memop = tcg_canonicalize_memop(memop, 1, 0);
177
gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
178
- tcg_debug_assert(gen != NULL);
179
+ if (gen) {
180
+ MemOpIdx oi = make_memop_idx(memop, idx);
181
+ gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
182
+ return;
183
+ }
184
185
- oi = make_memop_idx(memop, idx);
186
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
187
-#else
188
gen_helper_exit_atomic(cpu_env);
189
- /* Produce a result, so that we have a well-formed opcode stream
190
- with respect to uses of the result in the (dead) code following. */
191
+
192
+ /*
193
+ * Produce a result for a well-formed opcode stream. This satisfies
194
+ * liveness for set before used, which happens before this dead code
195
+ * is removed.
196
+ */
197
tcg_gen_movi_i64(retv, 0);
198
-#endif /* CONFIG_ATOMIC64 */
199
+ return;
200
+ }
201
+
202
+ if (TCG_TARGET_REG_BITS == 32) {
203
+ tcg_gen_atomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
204
+ TCGV_LOW(newv), idx, memop);
205
+ if (memop & MO_SIGN) {
206
+ tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
207
+ } else {
208
+ tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
209
+ }
210
} else {
211
TCGv_i32 c32 = tcg_temp_new_i32();
212
TCGv_i32 n32 = tcg_temp_new_i32();
213
--
214
2.34.1
215
216
diff view generated by jsdifflib
New patch
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Message-Id: <20221112042555.2622152-2-richard.henderson@linaro.org>
4
---
5
target/arm/helper-a64.h | 6 ---
6
target/arm/helper-a64.c | 104 -------------------------------------
7
target/arm/translate-a64.c | 60 ++++++++++++---------
8
3 files changed, 35 insertions(+), 135 deletions(-)
1
9
10
diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/arm/helper-a64.h
13
+++ b/target/arm/helper-a64.h
14
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_2(frecpx_f16, TCG_CALL_NO_RWG, f16, f16, ptr)
15
DEF_HELPER_FLAGS_2(fcvtx_f64_to_f32, TCG_CALL_NO_RWG, f32, f64, env)
16
DEF_HELPER_FLAGS_3(crc32_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
17
DEF_HELPER_FLAGS_3(crc32c_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
18
-DEF_HELPER_FLAGS_4(paired_cmpxchg64_le, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
19
-DEF_HELPER_FLAGS_4(paired_cmpxchg64_le_parallel, TCG_CALL_NO_WG,
20
- i64, env, i64, i64, i64)
21
-DEF_HELPER_FLAGS_4(paired_cmpxchg64_be, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
22
-DEF_HELPER_FLAGS_4(paired_cmpxchg64_be_parallel, TCG_CALL_NO_WG,
23
- i64, env, i64, i64, i64)
24
DEF_HELPER_5(casp_le_parallel, void, env, i32, i64, i64, i64)
25
DEF_HELPER_5(casp_be_parallel, void, env, i32, i64, i64, i64)
26
DEF_HELPER_FLAGS_3(advsimd_maxh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
27
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/arm/helper-a64.c
30
+++ b/target/arm/helper-a64.c
31
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes)
32
return crc32c(acc, buf, bytes) ^ 0xffffffff;
33
}
34
35
-uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr,
36
- uint64_t new_lo, uint64_t new_hi)
37
-{
38
- Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
39
- Int128 newv = int128_make128(new_lo, new_hi);
40
- Int128 oldv;
41
- uintptr_t ra = GETPC();
42
- uint64_t o0, o1;
43
- bool success;
44
- int mem_idx = cpu_mmu_index(env, false);
45
- MemOpIdx oi0 = make_memop_idx(MO_LEUQ | MO_ALIGN_16, mem_idx);
46
- MemOpIdx oi1 = make_memop_idx(MO_LEUQ, mem_idx);
47
-
48
- o0 = cpu_ldq_le_mmu(env, addr + 0, oi0, ra);
49
- o1 = cpu_ldq_le_mmu(env, addr + 8, oi1, ra);
50
- oldv = int128_make128(o0, o1);
51
-
52
- success = int128_eq(oldv, cmpv);
53
- if (success) {
54
- cpu_stq_le_mmu(env, addr + 0, int128_getlo(newv), oi1, ra);
55
- cpu_stq_le_mmu(env, addr + 8, int128_gethi(newv), oi1, ra);
56
- }
57
-
58
- return !success;
59
-}
60
-
61
-uint64_t HELPER(paired_cmpxchg64_le_parallel)(CPUARMState *env, uint64_t addr,
62
- uint64_t new_lo, uint64_t new_hi)
63
-{
64
- Int128 oldv, cmpv, newv;
65
- uintptr_t ra = GETPC();
66
- bool success;
67
- int mem_idx;
68
- MemOpIdx oi;
69
-
70
- assert(HAVE_CMPXCHG128);
71
-
72
- mem_idx = cpu_mmu_index(env, false);
73
- oi = make_memop_idx(MO_LE | MO_128 | MO_ALIGN, mem_idx);
74
-
75
- cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
76
- newv = int128_make128(new_lo, new_hi);
77
- oldv = cpu_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
78
-
79
- success = int128_eq(oldv, cmpv);
80
- return !success;
81
-}
82
-
83
-uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr,
84
- uint64_t new_lo, uint64_t new_hi)
85
-{
86
- /*
87
- * High and low need to be switched here because this is not actually a
88
- * 128bit store but two doublewords stored consecutively
89
- */
90
- Int128 cmpv = int128_make128(env->exclusive_high, env->exclusive_val);
91
- Int128 newv = int128_make128(new_hi, new_lo);
92
- Int128 oldv;
93
- uintptr_t ra = GETPC();
94
- uint64_t o0, o1;
95
- bool success;
96
- int mem_idx = cpu_mmu_index(env, false);
97
- MemOpIdx oi0 = make_memop_idx(MO_BEUQ | MO_ALIGN_16, mem_idx);
98
- MemOpIdx oi1 = make_memop_idx(MO_BEUQ, mem_idx);
99
-
100
- o1 = cpu_ldq_be_mmu(env, addr + 0, oi0, ra);
101
- o0 = cpu_ldq_be_mmu(env, addr + 8, oi1, ra);
102
- oldv = int128_make128(o0, o1);
103
-
104
- success = int128_eq(oldv, cmpv);
105
- if (success) {
106
- cpu_stq_be_mmu(env, addr + 0, int128_gethi(newv), oi1, ra);
107
- cpu_stq_be_mmu(env, addr + 8, int128_getlo(newv), oi1, ra);
108
- }
109
-
110
- return !success;
111
-}
112
-
113
-uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr,
114
- uint64_t new_lo, uint64_t new_hi)
115
-{
116
- Int128 oldv, cmpv, newv;
117
- uintptr_t ra = GETPC();
118
- bool success;
119
- int mem_idx;
120
- MemOpIdx oi;
121
-
122
- assert(HAVE_CMPXCHG128);
123
-
124
- mem_idx = cpu_mmu_index(env, false);
125
- oi = make_memop_idx(MO_BE | MO_128 | MO_ALIGN, mem_idx);
126
-
127
- /*
128
- * High and low need to be switched here because this is not actually a
129
- * 128bit store but two doublewords stored consecutively
130
- */
131
- cmpv = int128_make128(env->exclusive_high, env->exclusive_val);
132
- newv = int128_make128(new_hi, new_lo);
133
- oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
134
-
135
- success = int128_eq(oldv, cmpv);
136
- return !success;
137
-}
138
-
139
/* Writes back the old data into Rs. */
140
void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
141
uint64_t new_lo, uint64_t new_hi)
142
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
143
index XXXXXXX..XXXXXXX 100644
144
--- a/target/arm/translate-a64.c
145
+++ b/target/arm/translate-a64.c
146
@@ -XXX,XX +XXX,XX @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
147
get_mem_index(s),
148
MO_64 | MO_ALIGN | s->be_data);
149
tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
150
- } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
151
- if (!HAVE_CMPXCHG128) {
152
- gen_helper_exit_atomic(cpu_env);
153
- /*
154
- * Produce a result so we have a well-formed opcode
155
- * stream when the following (dead) code uses 'tmp'.
156
- * TCG will remove the dead ops for us.
157
- */
158
- tcg_gen_movi_i64(tmp, 0);
159
- } else if (s->be_data == MO_LE) {
160
- gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env,
161
- cpu_exclusive_addr,
162
- cpu_reg(s, rt),
163
- cpu_reg(s, rt2));
164
- } else {
165
- gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env,
166
- cpu_exclusive_addr,
167
- cpu_reg(s, rt),
168
- cpu_reg(s, rt2));
169
- }
170
- } else if (s->be_data == MO_LE) {
171
- gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr,
172
- cpu_reg(s, rt), cpu_reg(s, rt2));
173
} else {
174
- gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr,
175
- cpu_reg(s, rt), cpu_reg(s, rt2));
176
+ TCGv_i128 t16 = tcg_temp_new_i128();
177
+ TCGv_i128 c16 = tcg_temp_new_i128();
178
+ TCGv_i64 a, b;
179
+
180
+ if (s->be_data == MO_LE) {
181
+ tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2));
182
+ tcg_gen_concat_i64_i128(c16, cpu_exclusive_val,
183
+ cpu_exclusive_high);
184
+ } else {
185
+ tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt));
186
+ tcg_gen_concat_i64_i128(c16, cpu_exclusive_high,
187
+ cpu_exclusive_val);
188
+ }
189
+
190
+ tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16,
191
+ get_mem_index(s),
192
+ MO_128 | MO_ALIGN | s->be_data);
193
+ tcg_temp_free_i128(c16);
194
+
195
+ a = tcg_temp_new_i64();
196
+ b = tcg_temp_new_i64();
197
+ if (s->be_data == MO_LE) {
198
+ tcg_gen_extr_i128_i64(a, b, t16);
199
+ } else {
200
+ tcg_gen_extr_i128_i64(b, a, t16);
201
+ }
202
+
203
+ tcg_gen_xor_i64(a, a, cpu_exclusive_val);
204
+ tcg_gen_xor_i64(b, b, cpu_exclusive_high);
205
+ tcg_gen_or_i64(tmp, a, b);
206
+ tcg_temp_free_i64(a);
207
+ tcg_temp_free_i64(b);
208
+ tcg_temp_free_i128(t16);
209
+
210
+ tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0);
211
}
212
} else {
213
tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
214
--
215
2.34.1
diff view generated by jsdifflib
New patch
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Message-Id: <20221112042555.2622152-3-richard.henderson@linaro.org>
4
---
5
target/arm/helper-a64.h | 2 --
6
target/arm/helper-a64.c | 43 ---------------------------
7
target/arm/translate-a64.c | 61 +++++++++++---------------------------
8
3 files changed, 18 insertions(+), 88 deletions(-)
1
9
10
diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/arm/helper-a64.h
13
+++ b/target/arm/helper-a64.h
14
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_2(frecpx_f16, TCG_CALL_NO_RWG, f16, f16, ptr)
15
DEF_HELPER_FLAGS_2(fcvtx_f64_to_f32, TCG_CALL_NO_RWG, f32, f64, env)
16
DEF_HELPER_FLAGS_3(crc32_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
17
DEF_HELPER_FLAGS_3(crc32c_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
18
-DEF_HELPER_5(casp_le_parallel, void, env, i32, i64, i64, i64)
19
-DEF_HELPER_5(casp_be_parallel, void, env, i32, i64, i64, i64)
20
DEF_HELPER_FLAGS_3(advsimd_maxh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
21
DEF_HELPER_FLAGS_3(advsimd_minh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
22
DEF_HELPER_FLAGS_3(advsimd_maxnumh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
23
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
24
index XXXXXXX..XXXXXXX 100644
25
--- a/target/arm/helper-a64.c
26
+++ b/target/arm/helper-a64.c
27
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes)
28
return crc32c(acc, buf, bytes) ^ 0xffffffff;
29
}
30
31
-/* Writes back the old data into Rs. */
32
-void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
33
- uint64_t new_lo, uint64_t new_hi)
34
-{
35
- Int128 oldv, cmpv, newv;
36
- uintptr_t ra = GETPC();
37
- int mem_idx;
38
- MemOpIdx oi;
39
-
40
- assert(HAVE_CMPXCHG128);
41
-
42
- mem_idx = cpu_mmu_index(env, false);
43
- oi = make_memop_idx(MO_LE | MO_128 | MO_ALIGN, mem_idx);
44
-
45
- cmpv = int128_make128(env->xregs[rs], env->xregs[rs + 1]);
46
- newv = int128_make128(new_lo, new_hi);
47
- oldv = cpu_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
48
-
49
- env->xregs[rs] = int128_getlo(oldv);
50
- env->xregs[rs + 1] = int128_gethi(oldv);
51
-}
52
-
53
-void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
54
- uint64_t new_hi, uint64_t new_lo)
55
-{
56
- Int128 oldv, cmpv, newv;
57
- uintptr_t ra = GETPC();
58
- int mem_idx;
59
- MemOpIdx oi;
60
-
61
- assert(HAVE_CMPXCHG128);
62
-
63
- mem_idx = cpu_mmu_index(env, false);
64
- oi = make_memop_idx(MO_LE | MO_128 | MO_ALIGN, mem_idx);
65
-
66
- cmpv = int128_make128(env->xregs[rs + 1], env->xregs[rs]);
67
- newv = int128_make128(new_lo, new_hi);
68
- oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
69
-
70
- env->xregs[rs + 1] = int128_getlo(oldv);
71
- env->xregs[rs] = int128_gethi(oldv);
72
-}
73
-
74
/*
75
* AdvSIMD half-precision
76
*/
77
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
78
index XXXXXXX..XXXXXXX 100644
79
--- a/target/arm/translate-a64.c
80
+++ b/target/arm/translate-a64.c
81
@@ -XXX,XX +XXX,XX @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
82
tcg_gen_extr32_i64(s2, s1, cmp);
83
}
84
tcg_temp_free_i64(cmp);
85
- } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
86
- if (HAVE_CMPXCHG128) {
87
- TCGv_i32 tcg_rs = tcg_constant_i32(rs);
88
- if (s->be_data == MO_LE) {
89
- gen_helper_casp_le_parallel(cpu_env, tcg_rs,
90
- clean_addr, t1, t2);
91
- } else {
92
- gen_helper_casp_be_parallel(cpu_env, tcg_rs,
93
- clean_addr, t1, t2);
94
- }
95
- } else {
96
- gen_helper_exit_atomic(cpu_env);
97
- s->base.is_jmp = DISAS_NORETURN;
98
- }
99
} else {
100
- TCGv_i64 d1 = tcg_temp_new_i64();
101
- TCGv_i64 d2 = tcg_temp_new_i64();
102
- TCGv_i64 a2 = tcg_temp_new_i64();
103
- TCGv_i64 c1 = tcg_temp_new_i64();
104
- TCGv_i64 c2 = tcg_temp_new_i64();
105
- TCGv_i64 zero = tcg_constant_i64(0);
106
+ TCGv_i128 cmp = tcg_temp_new_i128();
107
+ TCGv_i128 val = tcg_temp_new_i128();
108
109
- /* Load the two words, in memory order. */
110
- tcg_gen_qemu_ld_i64(d1, clean_addr, memidx,
111
- MO_64 | MO_ALIGN_16 | s->be_data);
112
- tcg_gen_addi_i64(a2, clean_addr, 8);
113
- tcg_gen_qemu_ld_i64(d2, a2, memidx, MO_64 | s->be_data);
114
+ if (s->be_data == MO_LE) {
115
+ tcg_gen_concat_i64_i128(val, t1, t2);
116
+ tcg_gen_concat_i64_i128(cmp, s1, s2);
117
+ } else {
118
+ tcg_gen_concat_i64_i128(val, t2, t1);
119
+ tcg_gen_concat_i64_i128(cmp, s2, s1);
120
+ }
121
122
- /* Compare the two words, also in memory order. */
123
- tcg_gen_setcond_i64(TCG_COND_EQ, c1, d1, s1);
124
- tcg_gen_setcond_i64(TCG_COND_EQ, c2, d2, s2);
125
- tcg_gen_and_i64(c2, c2, c1);
126
+ tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx,
127
+ MO_128 | MO_ALIGN | s->be_data);
128
+ tcg_temp_free_i128(val);
129
130
- /* If compare equal, write back new data, else write back old data. */
131
- tcg_gen_movcond_i64(TCG_COND_NE, c1, c2, zero, t1, d1);
132
- tcg_gen_movcond_i64(TCG_COND_NE, c2, c2, zero, t2, d2);
133
- tcg_gen_qemu_st_i64(c1, clean_addr, memidx, MO_64 | s->be_data);
134
- tcg_gen_qemu_st_i64(c2, a2, memidx, MO_64 | s->be_data);
135
- tcg_temp_free_i64(a2);
136
- tcg_temp_free_i64(c1);
137
- tcg_temp_free_i64(c2);
138
-
139
- /* Write back the data from memory to Rs. */
140
- tcg_gen_mov_i64(s1, d1);
141
- tcg_gen_mov_i64(s2, d2);
142
- tcg_temp_free_i64(d1);
143
- tcg_temp_free_i64(d2);
144
+ if (s->be_data == MO_LE) {
145
+ tcg_gen_extr_i128_i64(s1, s2, cmp);
146
+ } else {
147
+ tcg_gen_extr_i128_i64(s2, s1, cmp);
148
+ }
149
+ tcg_temp_free_i128(cmp);
150
}
151
}
152
153
--
154
2.34.1
diff view generated by jsdifflib
New patch
1
1
Note that the previous direct reference to reserve_val,
2
3
- tcg_gen_ld_i64(t1, cpu_env, (ctx->le_mode
4
- ? offsetof(CPUPPCState, reserve_val2)
5
- : offsetof(CPUPPCState, reserve_val)));
6
7
was incorrect because all references should have gone through
8
cpu_reserve_val. Create a cpu_reserve_val2 tcg temp to fix this.
9
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
12
Message-Id: <20221112061122.2720163-2-richard.henderson@linaro.org>
13
---
14
target/ppc/helper.h | 2 -
15
target/ppc/mem_helper.c | 44 -----------------
16
target/ppc/translate.c | 102 ++++++++++++++++++----------------------
17
3 files changed, 47 insertions(+), 101 deletions(-)
18
19
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
20
index XXXXXXX..XXXXXXX 100644
21
--- a/target/ppc/helper.h
22
+++ b/target/ppc/helper.h
23
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_5(stq_le_parallel, TCG_CALL_NO_WG,
24
void, env, tl, i64, i64, i32)
25
DEF_HELPER_FLAGS_5(stq_be_parallel, TCG_CALL_NO_WG,
26
void, env, tl, i64, i64, i32)
27
-DEF_HELPER_5(stqcx_le_parallel, i32, env, tl, i64, i64, i32)
28
-DEF_HELPER_5(stqcx_be_parallel, i32, env, tl, i64, i64, i32)
29
#endif
30
diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/target/ppc/mem_helper.c
33
+++ b/target/ppc/mem_helper.c
34
@@ -XXX,XX +XXX,XX @@ void helper_stq_be_parallel(CPUPPCState *env, target_ulong addr,
35
val = int128_make128(lo, hi);
36
cpu_atomic_sto_be_mmu(env, addr, val, opidx, GETPC());
37
}
38
-
39
-uint32_t helper_stqcx_le_parallel(CPUPPCState *env, target_ulong addr,
40
- uint64_t new_lo, uint64_t new_hi,
41
- uint32_t opidx)
42
-{
43
- bool success = false;
44
-
45
- /* We will have raised EXCP_ATOMIC from the translator. */
46
- assert(HAVE_CMPXCHG128);
47
-
48
- if (likely(addr == env->reserve_addr)) {
49
- Int128 oldv, cmpv, newv;
50
-
51
- cmpv = int128_make128(env->reserve_val2, env->reserve_val);
52
- newv = int128_make128(new_lo, new_hi);
53
- oldv = cpu_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv,
54
- opidx, GETPC());
55
- success = int128_eq(oldv, cmpv);
56
- }
57
- env->reserve_addr = -1;
58
- return env->so + success * CRF_EQ_BIT;
59
-}
60
-
61
-uint32_t helper_stqcx_be_parallel(CPUPPCState *env, target_ulong addr,
62
- uint64_t new_lo, uint64_t new_hi,
63
- uint32_t opidx)
64
-{
65
- bool success = false;
66
-
67
- /* We will have raised EXCP_ATOMIC from the translator. */
68
- assert(HAVE_CMPXCHG128);
69
-
70
- if (likely(addr == env->reserve_addr)) {
71
- Int128 oldv, cmpv, newv;
72
-
73
- cmpv = int128_make128(env->reserve_val2, env->reserve_val);
74
- newv = int128_make128(new_lo, new_hi);
75
- oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv,
76
- opidx, GETPC());
77
- success = int128_eq(oldv, cmpv);
78
- }
79
- env->reserve_addr = -1;
80
- return env->so + success * CRF_EQ_BIT;
81
-}
82
#endif
83
84
/*****************************************************************************/
85
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
86
index XXXXXXX..XXXXXXX 100644
87
--- a/target/ppc/translate.c
88
+++ b/target/ppc/translate.c
89
@@ -XXX,XX +XXX,XX @@ static TCGv cpu_cfar;
90
static TCGv cpu_xer, cpu_so, cpu_ov, cpu_ca, cpu_ov32, cpu_ca32;
91
static TCGv cpu_reserve;
92
static TCGv cpu_reserve_val;
93
+static TCGv cpu_reserve_val2;
94
static TCGv cpu_fpscr;
95
static TCGv_i32 cpu_access_type;
96
97
@@ -XXX,XX +XXX,XX @@ void ppc_translate_init(void)
98
offsetof(CPUPPCState, reserve_addr),
99
"reserve_addr");
100
cpu_reserve_val = tcg_global_mem_new(cpu_env,
101
- offsetof(CPUPPCState, reserve_val),
102
- "reserve_val");
103
+ offsetof(CPUPPCState, reserve_val),
104
+ "reserve_val");
105
+ cpu_reserve_val2 = tcg_global_mem_new(cpu_env,
106
+ offsetof(CPUPPCState, reserve_val2),
107
+ "reserve_val2");
108
109
cpu_fpscr = tcg_global_mem_new(cpu_env,
110
offsetof(CPUPPCState, fpscr), "fpscr");
111
@@ -XXX,XX +XXX,XX @@ static void gen_lqarx(DisasContext *ctx)
112
/* stqcx. */
113
static void gen_stqcx_(DisasContext *ctx)
114
{
115
+ TCGLabel *lab_fail, *lab_over;
116
int rs = rS(ctx->opcode);
117
- TCGv EA, hi, lo;
118
+ TCGv EA, t0, t1;
119
+ TCGv_i128 cmp, val;
120
121
if (unlikely(rs & 1)) {
122
gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
123
return;
124
}
125
126
+ lab_fail = gen_new_label();
127
+ lab_over = gen_new_label();
128
+
129
gen_set_access_type(ctx, ACCESS_RES);
130
EA = tcg_temp_new();
131
gen_addr_reg_index(ctx, EA);
132
133
+ tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, lab_fail);
134
+ tcg_temp_free(EA);
135
+
136
+ cmp = tcg_temp_new_i128();
137
+ val = tcg_temp_new_i128();
138
+
139
+ tcg_gen_concat_i64_i128(cmp, cpu_reserve_val2, cpu_reserve_val);
140
+
141
/* Note that the low part is always in RS+1, even in LE mode. */
142
- lo = cpu_gpr[rs + 1];
143
- hi = cpu_gpr[rs];
144
+ tcg_gen_concat_i64_i128(val, cpu_gpr[rs + 1], cpu_gpr[rs]);
145
146
- if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
147
- if (HAVE_CMPXCHG128) {
148
- TCGv_i32 oi = tcg_const_i32(DEF_MEMOP(MO_128) | MO_ALIGN);
149
- if (ctx->le_mode) {
150
- gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env,
151
- EA, lo, hi, oi);
152
- } else {
153
- gen_helper_stqcx_be_parallel(cpu_crf[0], cpu_env,
154
- EA, lo, hi, oi);
155
- }
156
- tcg_temp_free_i32(oi);
157
- } else {
158
- /* Restart with exclusive lock. */
159
- gen_helper_exit_atomic(cpu_env);
160
- ctx->base.is_jmp = DISAS_NORETURN;
161
- }
162
- tcg_temp_free(EA);
163
- } else {
164
- TCGLabel *lab_fail = gen_new_label();
165
- TCGLabel *lab_over = gen_new_label();
166
- TCGv_i64 t0 = tcg_temp_new_i64();
167
- TCGv_i64 t1 = tcg_temp_new_i64();
168
+ tcg_gen_atomic_cmpxchg_i128(val, cpu_reserve, cmp, val, ctx->mem_idx,
169
+ DEF_MEMOP(MO_128 | MO_ALIGN));
170
+ tcg_temp_free_i128(cmp);
171
172
- tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, lab_fail);
173
- tcg_temp_free(EA);
174
+ t0 = tcg_temp_new();
175
+ t1 = tcg_temp_new();
176
+ tcg_gen_extr_i128_i64(t1, t0, val);
177
+ tcg_temp_free_i128(val);
178
179
- gen_qemu_ld64_i64(ctx, t0, cpu_reserve);
180
- tcg_gen_ld_i64(t1, cpu_env, (ctx->le_mode
181
- ? offsetof(CPUPPCState, reserve_val2)
182
- : offsetof(CPUPPCState, reserve_val)));
183
- tcg_gen_brcond_i64(TCG_COND_NE, t0, t1, lab_fail);
184
+ tcg_gen_xor_tl(t1, t1, cpu_reserve_val2);
185
+ tcg_gen_xor_tl(t0, t0, cpu_reserve_val);
186
+ tcg_gen_or_tl(t0, t0, t1);
187
+ tcg_temp_free(t1);
188
189
- tcg_gen_addi_i64(t0, cpu_reserve, 8);
190
- gen_qemu_ld64_i64(ctx, t0, t0);
191
- tcg_gen_ld_i64(t1, cpu_env, (ctx->le_mode
192
- ? offsetof(CPUPPCState, reserve_val)
193
- : offsetof(CPUPPCState, reserve_val2)));
194
- tcg_gen_brcond_i64(TCG_COND_NE, t0, t1, lab_fail);
195
+ tcg_gen_setcondi_tl(TCG_COND_EQ, t0, t0, 0);
196
+ tcg_gen_shli_tl(t0, t0, CRF_EQ_BIT);
197
+ tcg_gen_or_tl(t0, t0, cpu_so);
198
+ tcg_gen_trunc_tl_i32(cpu_crf[0], t0);
199
+ tcg_temp_free(t0);
200
201
- /* Success */
202
- gen_qemu_st64_i64(ctx, ctx->le_mode ? lo : hi, cpu_reserve);
203
- tcg_gen_addi_i64(t0, cpu_reserve, 8);
204
- gen_qemu_st64_i64(ctx, ctx->le_mode ? hi : lo, t0);
205
+ tcg_gen_br(lab_over);
206
+ gen_set_label(lab_fail);
207
208
- tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
209
- tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], CRF_EQ);
210
- tcg_gen_br(lab_over);
211
+ /*
212
+ * Address mismatch implies failure. But we still need to provide
213
+ * the memory barrier semantics of the instruction.
214
+ */
215
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
216
+ tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
217
218
- gen_set_label(lab_fail);
219
- tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
220
-
221
- gen_set_label(lab_over);
222
- tcg_gen_movi_tl(cpu_reserve, -1);
223
- tcg_temp_free_i64(t0);
224
- tcg_temp_free_i64(t1);
225
- }
226
+ gen_set_label(lab_over);
227
+ tcg_gen_movi_tl(cpu_reserve, -1);
228
}
229
#endif /* defined(TARGET_PPC64) */
230
231
--
232
2.34.1
diff view generated by jsdifflib
New patch
1
From: Ilya Leoshkevich <iii@linux.ibm.com>
1
2
3
Add a basic test to prevent regressions.
4
5
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
6
Message-Id: <20221101111300.2539919-1-iii@linux.ibm.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tests/tcg/s390x/div.c | 40 +++++++++++++++++++++++++++++++++
10
tests/tcg/s390x/Makefile.target | 1 +
11
2 files changed, 41 insertions(+)
12
create mode 100644 tests/tcg/s390x/div.c
13
14
diff --git a/tests/tcg/s390x/div.c b/tests/tcg/s390x/div.c
15
new file mode 100644
16
index XXXXXXX..XXXXXXX
17
--- /dev/null
18
+++ b/tests/tcg/s390x/div.c
19
@@ -XXX,XX +XXX,XX @@
20
+#include <assert.h>
21
+#include <stdint.h>
22
+
23
+static void test_dr(void)
24
+{
25
+ register int32_t r0 asm("r0") = -1;
26
+ register int32_t r1 asm("r1") = -4241;
27
+ int32_t b = 101, q, r;
28
+
29
+ asm("dr %[r0],%[b]"
30
+ : [r0] "+r" (r0), [r1] "+r" (r1)
31
+ : [b] "r" (b)
32
+ : "cc");
33
+ q = r1;
34
+ r = r0;
35
+ assert(q == -41);
36
+ assert(r == -100);
37
+}
38
+
39
+static void test_dlr(void)
40
+{
41
+ register uint32_t r0 asm("r0") = 0;
42
+ register uint32_t r1 asm("r1") = 4243;
43
+ uint32_t b = 101, q, r;
44
+
45
+ asm("dlr %[r0],%[b]"
46
+ : [r0] "+r" (r0), [r1] "+r" (r1)
47
+ : [b] "r" (b)
48
+ : "cc");
49
+ q = r1;
50
+ r = r0;
51
+ assert(q == 42);
52
+ assert(r == 1);
53
+}
54
+
55
+int main(void)
56
+{
57
+ test_dr();
58
+ test_dlr();
59
+}
60
diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
61
index XXXXXXX..XXXXXXX 100644
62
--- a/tests/tcg/s390x/Makefile.target
63
+++ b/tests/tcg/s390x/Makefile.target
64
@@ -XXX,XX +XXX,XX @@ TESTS+=trap
65
TESTS+=signals-s390x
66
TESTS+=branch-relative-long
67
TESTS+=noexec
68
+TESTS+=div
69
70
Z13_TESTS=vistr
71
$(Z13_TESTS): CFLAGS+=-march=z13 -O2
72
--
73
2.34.1
diff view generated by jsdifflib
New patch
1
From: Ilya Leoshkevich <iii@linux.ibm.com>
1
2
3
Add a basic test to prevent regressions.
4
5
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
6
Message-Id: <20221025213008.2209006-2-iii@linux.ibm.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tests/tcg/s390x/clst.c | 82 +++++++++++++++++++++++++++++++++
10
tests/tcg/s390x/Makefile.target | 1 +
11
2 files changed, 83 insertions(+)
12
create mode 100644 tests/tcg/s390x/clst.c
13
14
diff --git a/tests/tcg/s390x/clst.c b/tests/tcg/s390x/clst.c
15
new file mode 100644
16
index XXXXXXX..XXXXXXX
17
--- /dev/null
18
+++ b/tests/tcg/s390x/clst.c
19
@@ -XXX,XX +XXX,XX @@
20
+#define _GNU_SOURCE
21
+#include <stdio.h>
22
+#include <stdlib.h>
23
+
24
+static int clst(char sep, const char **s1, const char **s2)
25
+{
26
+ const char *r1 = *s1;
27
+ const char *r2 = *s2;
28
+ int cc;
29
+
30
+ do {
31
+ register int r0 asm("r0") = sep;
32
+
33
+ asm("clst %[r1],%[r2]\n"
34
+ "ipm %[cc]\n"
35
+ "srl %[cc],28"
36
+ : [r1] "+r" (r1), [r2] "+r" (r2), "+r" (r0), [cc] "=r" (cc)
37
+ :
38
+ : "cc");
39
+ *s1 = r1;
40
+ *s2 = r2;
41
+ } while (cc == 3);
42
+
43
+ return cc;
44
+}
45
+
46
+static const struct test {
47
+ const char *name;
48
+ char sep;
49
+ const char *s1;
50
+ const char *s2;
51
+ int exp_cc;
52
+ int exp_off;
53
+} tests[] = {
54
+ {
55
+ .name = "cc0",
56
+ .sep = 0,
57
+ .s1 = "aa",
58
+ .s2 = "aa",
59
+ .exp_cc = 0,
60
+ .exp_off = 0,
61
+ },
62
+ {
63
+ .name = "cc1",
64
+ .sep = 1,
65
+ .s1 = "a\x01",
66
+ .s2 = "aa\x01",
67
+ .exp_cc = 1,
68
+ .exp_off = 1,
69
+ },
70
+ {
71
+ .name = "cc2",
72
+ .sep = 2,
73
+ .s1 = "abc\x02",
74
+ .s2 = "abb\x02",
75
+ .exp_cc = 2,
76
+ .exp_off = 2,
77
+ },
78
+};
79
+
80
+int main(void)
81
+{
82
+ const struct test *t;
83
+ const char *s1, *s2;
84
+ size_t i;
85
+ int cc;
86
+
87
+ for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
88
+ t = &tests[i];
89
+ s1 = t->s1;
90
+ s2 = t->s2;
91
+ cc = clst(t->sep, &s1, &s2);
92
+ if (cc != t->exp_cc ||
93
+ s1 != t->s1 + t->exp_off ||
94
+ s2 != t->s2 + t->exp_off) {
95
+ fprintf(stderr, "%s\n", t->name);
96
+ return EXIT_FAILURE;
97
+ }
98
+ }
99
+
100
+ return EXIT_SUCCESS;
101
+}
102
diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
103
index XXXXXXX..XXXXXXX 100644
104
--- a/tests/tcg/s390x/Makefile.target
105
+++ b/tests/tcg/s390x/Makefile.target
106
@@ -XXX,XX +XXX,XX @@ TESTS+=signals-s390x
107
TESTS+=branch-relative-long
108
TESTS+=noexec
109
TESTS+=div
110
+TESTS+=clst
111
112
Z13_TESTS=vistr
113
$(Z13_TESTS): CFLAGS+=-march=z13 -O2
114
--
115
2.34.1
diff view generated by jsdifflib
New patch
1
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
2
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
tests/tcg/s390x/long-double.c | 24 ++++++++++++++++++++++++
6
tests/tcg/s390x/Makefile.target | 1 +
7
2 files changed, 25 insertions(+)
8
create mode 100644 tests/tcg/s390x/long-double.c
1
9
10
diff --git a/tests/tcg/s390x/long-double.c b/tests/tcg/s390x/long-double.c
11
new file mode 100644
12
index XXXXXXX..XXXXXXX
13
--- /dev/null
14
+++ b/tests/tcg/s390x/long-double.c
15
@@ -XXX,XX +XXX,XX @@
16
+/*
17
+ * Perform some basic arithmetic with long double, as a sanity check.
18
+ * With small integral numbers, we can cross-check with integers.
19
+ */
20
+
21
+#include <assert.h>
22
+
23
+int main()
24
+{
25
+ int i, j;
26
+
27
+ for (i = 1; i < 5; i++) {
28
+ for (j = 1; j < 5; j++) {
29
+ long double la = (long double)i + j;
30
+ long double lm = (long double)i * j;
31
+ long double ls = (long double)i - j;
32
+
33
+ assert(la == i + j);
34
+ assert(lm == i * j);
35
+ assert(ls == i - j);
36
+ }
37
+ }
38
+ return 0;
39
+}
40
diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
41
index XXXXXXX..XXXXXXX 100644
42
--- a/tests/tcg/s390x/Makefile.target
43
+++ b/tests/tcg/s390x/Makefile.target
44
@@ -XXX,XX +XXX,XX @@ TESTS+=branch-relative-long
45
TESTS+=noexec
46
TESTS+=div
47
TESTS+=clst
48
+TESTS+=long-double
49
50
Z13_TESTS=vistr
51
$(Z13_TESTS): CFLAGS+=-march=z13 -O2
52
--
53
2.34.1
54
55
diff view generated by jsdifflib
New patch
1
From: Ilya Leoshkevich <iii@linux.ibm.com>
1
2
3
Add a simple test to prevent regressions.
4
5
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
6
Message-Id: <20230201133257.3223115-1-iii@linux.ibm.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tests/tcg/s390x/cdsg.c | 93 +++++++++++++++++++++++++++++++++
10
tests/tcg/s390x/Makefile.target | 4 ++
11
2 files changed, 97 insertions(+)
12
create mode 100644 tests/tcg/s390x/cdsg.c
13
14
diff --git a/tests/tcg/s390x/cdsg.c b/tests/tcg/s390x/cdsg.c
15
new file mode 100644
16
index XXXXXXX..XXXXXXX
17
--- /dev/null
18
+++ b/tests/tcg/s390x/cdsg.c
19
@@ -XXX,XX +XXX,XX @@
20
+/*
21
+ * Test CDSG instruction.
22
+ *
23
+ * Increment the first half of aligned_quadword by 1, and the second half by 2
24
+ * from 2 threads. Verify that the result is consistent.
25
+ *
26
+ * SPDX-License-Identifier: GPL-2.0-or-later
27
+ */
28
+#include <assert.h>
29
+#include <pthread.h>
30
+#include <stdbool.h>
31
+#include <stdlib.h>
32
+
33
+static volatile bool start;
34
+typedef unsigned long aligned_quadword[2] __attribute__((__aligned__(16)));
35
+static aligned_quadword val;
36
+static const int n_iterations = 1000000;
37
+
38
+static inline int cdsg(unsigned long *orig0, unsigned long *orig1,
39
+ unsigned long new0, unsigned long new1,
40
+ aligned_quadword *mem)
41
+{
42
+ register unsigned long r0 asm("r0");
43
+ register unsigned long r1 asm("r1");
44
+ register unsigned long r2 asm("r2");
45
+ register unsigned long r3 asm("r3");
46
+ int cc;
47
+
48
+ r0 = *orig0;
49
+ r1 = *orig1;
50
+ r2 = new0;
51
+ r3 = new1;
52
+ asm("cdsg %[r0],%[r2],%[db2]\n"
53
+ "ipm %[cc]"
54
+ : [r0] "+r" (r0)
55
+ , [r1] "+r" (r1)
56
+ , [db2] "+m" (*mem)
57
+ , [cc] "=r" (cc)
58
+ : [r2] "r" (r2)
59
+ , [r3] "r" (r3)
60
+ : "cc");
61
+ *orig0 = r0;
62
+ *orig1 = r1;
63
+
64
+ return (cc >> 28) & 3;
65
+}
66
+
67
+void *cdsg_loop(void *arg)
68
+{
69
+ unsigned long orig0, orig1, new0, new1;
70
+ int cc;
71
+ int i;
72
+
73
+ while (!start) {
74
+ }
75
+
76
+ orig0 = val[0];
77
+ orig1 = val[1];
78
+ for (i = 0; i < n_iterations;) {
79
+ new0 = orig0 + 1;
80
+ new1 = orig1 + 2;
81
+
82
+ cc = cdsg(&orig0, &orig1, new0, new1, &val);
83
+
84
+ if (cc == 0) {
85
+ orig0 = new0;
86
+ orig1 = new1;
87
+ i++;
88
+ } else {
89
+ assert(cc == 1);
90
+ }
91
+ }
92
+
93
+ return NULL;
94
+}
95
+
96
+int main(void)
97
+{
98
+ pthread_t thread;
99
+ int ret;
100
+
101
+ ret = pthread_create(&thread, NULL, cdsg_loop, NULL);
102
+ assert(ret == 0);
103
+ start = true;
104
+ cdsg_loop(NULL);
105
+ ret = pthread_join(thread, NULL);
106
+ assert(ret == 0);
107
+
108
+ assert(val[0] == n_iterations * 2);
109
+ assert(val[1] == n_iterations * 4);
110
+
111
+ return EXIT_SUCCESS;
112
+}
113
diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
114
index XXXXXXX..XXXXXXX 100644
115
--- a/tests/tcg/s390x/Makefile.target
116
+++ b/tests/tcg/s390x/Makefile.target
117
@@ -XXX,XX +XXX,XX @@ TESTS+=noexec
118
TESTS+=div
119
TESTS+=clst
120
TESTS+=long-double
121
+TESTS+=cdsg
122
+
123
+cdsg: CFLAGS+=-pthread
124
+cdsg: LDFLAGS+=-pthread
125
126
Z13_TESTS=vistr
127
$(Z13_TESTS): CFLAGS+=-march=z13 -O2
128
--
129
2.34.1
diff view generated by jsdifflib
New patch
1
Pack the quotient and remainder into a single uint64_t.
1
2
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Reviewed-by: David Hildenbrand <david@redhat.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
v2: Fix operand ordering; use tcg_extr32_i64.
8
---
9
target/s390x/helper.h | 2 +-
10
target/s390x/tcg/int_helper.c | 26 +++++++++++++-------------
11
target/s390x/tcg/translate.c | 8 ++++----
12
3 files changed, 18 insertions(+), 18 deletions(-)
13
14
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/target/s390x/helper.h
17
+++ b/target/s390x/helper.h
18
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(clc, TCG_CALL_NO_WG, i32, env, i32, i64, i64)
19
DEF_HELPER_3(mvcl, i32, env, i32, i32)
20
DEF_HELPER_3(clcl, i32, env, i32, i32)
21
DEF_HELPER_FLAGS_4(clm, TCG_CALL_NO_WG, i32, env, i32, i32, i64)
22
-DEF_HELPER_FLAGS_3(divs32, TCG_CALL_NO_WG, s64, env, s64, s64)
23
+DEF_HELPER_FLAGS_3(divs32, TCG_CALL_NO_WG, i64, env, s64, s64)
24
DEF_HELPER_FLAGS_3(divu32, TCG_CALL_NO_WG, i64, env, i64, i64)
25
DEF_HELPER_FLAGS_3(divs64, TCG_CALL_NO_WG, s64, env, s64, s64)
26
DEF_HELPER_FLAGS_4(divu64, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
27
diff --git a/target/s390x/tcg/int_helper.c b/target/s390x/tcg/int_helper.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/s390x/tcg/int_helper.c
30
+++ b/target/s390x/tcg/int_helper.c
31
@@ -XXX,XX +XXX,XX @@
32
#endif
33
34
/* 64/32 -> 32 signed division */
35
-int64_t HELPER(divs32)(CPUS390XState *env, int64_t a, int64_t b64)
36
+uint64_t HELPER(divs32)(CPUS390XState *env, int64_t a, int64_t b64)
37
{
38
- int32_t ret, b = b64;
39
- int64_t q;
40
+ int32_t b = b64;
41
+ int64_t q, r;
42
43
if (b == 0) {
44
tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC());
45
}
46
47
- ret = q = a / b;
48
- env->retxl = a % b;
49
+ q = a / b;
50
+ r = a % b;
51
52
/* Catch non-representable quotient. */
53
- if (ret != q) {
54
+ if (q != (int32_t)q) {
55
tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC());
56
}
57
58
- return ret;
59
+ return deposit64(q, 32, 32, r);
60
}
61
62
/* 64/32 -> 32 unsigned division */
63
uint64_t HELPER(divu32)(CPUS390XState *env, uint64_t a, uint64_t b64)
64
{
65
- uint32_t ret, b = b64;
66
- uint64_t q;
67
+ uint32_t b = b64;
68
+ uint64_t q, r;
69
70
if (b == 0) {
71
tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC());
72
}
73
74
- ret = q = a / b;
75
- env->retxl = a % b;
76
+ q = a / b;
77
+ r = a % b;
78
79
/* Catch non-representable quotient. */
80
- if (ret != q) {
81
+ if (q != (uint32_t)q) {
82
tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC());
83
}
84
85
- return ret;
86
+ return deposit64(q, 32, 32, r);
87
}
88
89
/* 64/64 -> 64 signed division */
90
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
91
index XXXXXXX..XXXXXXX 100644
92
--- a/target/s390x/tcg/translate.c
93
+++ b/target/s390x/tcg/translate.c
94
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_diag(DisasContext *s, DisasOps *o)
95
96
static DisasJumpType op_divs32(DisasContext *s, DisasOps *o)
97
{
98
- gen_helper_divs32(o->out2, cpu_env, o->in1, o->in2);
99
- return_low128(o->out);
100
+ gen_helper_divs32(o->out, cpu_env, o->in1, o->in2);
101
+ tcg_gen_extr32_i64(o->out2, o->out, o->out);
102
return DISAS_NEXT;
103
}
104
105
static DisasJumpType op_divu32(DisasContext *s, DisasOps *o)
106
{
107
- gen_helper_divu32(o->out2, cpu_env, o->in1, o->in2);
108
- return_low128(o->out);
109
+ gen_helper_divu32(o->out, cpu_env, o->in1, o->in2);
110
+ tcg_gen_extr32_i64(o->out2, o->out, o->out);
111
return DISAS_NEXT;
112
}
113
114
--
115
2.34.1
116
117
diff view generated by jsdifflib
New patch
1
Pack the quotient and remainder into a single Int128.
2
Use the divu128 primitive to remove the cpu_abort on
3
32-bit hosts.
1
4
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
v2: Extended div test case to cover these insns.
10
---
11
target/s390x/helper.h | 4 ++--
12
target/s390x/tcg/int_helper.c | 38 +++++++++--------------------------
13
target/s390x/tcg/translate.c | 14 +++++++++----
14
tests/tcg/s390x/div.c | 35 ++++++++++++++++++++++++++++++++
15
4 files changed, 56 insertions(+), 35 deletions(-)
16
17
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
18
index XXXXXXX..XXXXXXX 100644
19
--- a/target/s390x/helper.h
20
+++ b/target/s390x/helper.h
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(clcl, i32, env, i32, i32)
22
DEF_HELPER_FLAGS_4(clm, TCG_CALL_NO_WG, i32, env, i32, i32, i64)
23
DEF_HELPER_FLAGS_3(divs32, TCG_CALL_NO_WG, i64, env, s64, s64)
24
DEF_HELPER_FLAGS_3(divu32, TCG_CALL_NO_WG, i64, env, i64, i64)
25
-DEF_HELPER_FLAGS_3(divs64, TCG_CALL_NO_WG, s64, env, s64, s64)
26
-DEF_HELPER_FLAGS_4(divu64, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
27
+DEF_HELPER_FLAGS_3(divs64, TCG_CALL_NO_WG, i128, env, s64, s64)
28
+DEF_HELPER_FLAGS_4(divu64, TCG_CALL_NO_WG, i128, env, i64, i64, i64)
29
DEF_HELPER_3(srst, void, env, i32, i32)
30
DEF_HELPER_3(srstu, void, env, i32, i32)
31
DEF_HELPER_4(clst, i64, env, i64, i64, i64)
32
diff --git a/target/s390x/tcg/int_helper.c b/target/s390x/tcg/int_helper.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/target/s390x/tcg/int_helper.c
35
+++ b/target/s390x/tcg/int_helper.c
36
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(divu32)(CPUS390XState *env, uint64_t a, uint64_t b64)
37
}
38
39
/* 64/64 -> 64 signed division */
40
-int64_t HELPER(divs64)(CPUS390XState *env, int64_t a, int64_t b)
41
+Int128 HELPER(divs64)(CPUS390XState *env, int64_t a, int64_t b)
42
{
43
/* Catch divide by zero, and non-representable quotient (MIN / -1). */
44
if (b == 0 || (b == -1 && a == (1ll << 63))) {
45
tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC());
46
}
47
- env->retxl = a % b;
48
- return a / b;
49
+ return int128_make128(a / b, a % b);
50
}
51
52
/* 128 -> 64/64 unsigned division */
53
-uint64_t HELPER(divu64)(CPUS390XState *env, uint64_t ah, uint64_t al,
54
- uint64_t b)
55
+Int128 HELPER(divu64)(CPUS390XState *env, uint64_t ah, uint64_t al, uint64_t b)
56
{
57
- uint64_t ret;
58
- /* Signal divide by zero. */
59
- if (b == 0) {
60
- tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC());
61
- }
62
- if (ah == 0) {
63
- /* 64 -> 64/64 case */
64
- env->retxl = al % b;
65
- ret = al / b;
66
- } else {
67
- /* ??? Move i386 idivq helper to host-utils. */
68
-#ifdef CONFIG_INT128
69
- __uint128_t a = ((__uint128_t)ah << 64) | al;
70
- __uint128_t q = a / b;
71
- env->retxl = a % b;
72
- ret = q;
73
- if (ret != q) {
74
- tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC());
75
+ if (b != 0) {
76
+ uint64_t r = divu128(&al, &ah, b);
77
+ if (ah == 0) {
78
+ return int128_make128(al, r);
79
}
80
-#else
81
- /* 32-bit hosts would need special wrapper functionality - just abort if
82
- we encounter such a case; it's very unlikely anyways. */
83
- cpu_abort(env_cpu(env), "128 -> 64/64 division not implemented\n");
84
-#endif
85
}
86
- return ret;
87
+ /* divide by zero or overflow */
88
+ tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC());
89
}
90
91
uint64_t HELPER(cvd)(int32_t reg)
92
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
93
index XXXXXXX..XXXXXXX 100644
94
--- a/target/s390x/tcg/translate.c
95
+++ b/target/s390x/tcg/translate.c
96
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_divu32(DisasContext *s, DisasOps *o)
97
98
static DisasJumpType op_divs64(DisasContext *s, DisasOps *o)
99
{
100
- gen_helper_divs64(o->out2, cpu_env, o->in1, o->in2);
101
- return_low128(o->out);
102
+ TCGv_i128 t = tcg_temp_new_i128();
103
+
104
+ gen_helper_divs64(t, cpu_env, o->in1, o->in2);
105
+ tcg_gen_extr_i128_i64(o->out2, o->out, t);
106
+ tcg_temp_free_i128(t);
107
return DISAS_NEXT;
108
}
109
110
static DisasJumpType op_divu64(DisasContext *s, DisasOps *o)
111
{
112
- gen_helper_divu64(o->out2, cpu_env, o->out, o->out2, o->in2);
113
- return_low128(o->out);
114
+ TCGv_i128 t = tcg_temp_new_i128();
115
+
116
+ gen_helper_divu64(t, cpu_env, o->out, o->out2, o->in2);
117
+ tcg_gen_extr_i128_i64(o->out2, o->out, t);
118
+ tcg_temp_free_i128(t);
119
return DISAS_NEXT;
120
}
121
122
diff --git a/tests/tcg/s390x/div.c b/tests/tcg/s390x/div.c
123
index XXXXXXX..XXXXXXX 100644
124
--- a/tests/tcg/s390x/div.c
125
+++ b/tests/tcg/s390x/div.c
126
@@ -XXX,XX +XXX,XX @@ static void test_dlr(void)
127
assert(r == 1);
128
}
129
130
+static void test_dsgr(void)
131
+{
132
+ register int64_t r0 asm("r0") = -1;
133
+ register int64_t r1 asm("r1") = -4241;
134
+ int64_t b = 101, q, r;
135
+
136
+ asm("dsgr %[r0],%[b]"
137
+ : [r0] "+r" (r0), [r1] "+r" (r1)
138
+ : [b] "r" (b)
139
+ : "cc");
140
+ q = r1;
141
+ r = r0;
142
+ assert(q == -41);
143
+ assert(r == -100);
144
+}
145
+
146
+static void test_dlgr(void)
147
+{
148
+ register uint64_t r0 asm("r0") = 0;
149
+ register uint64_t r1 asm("r1") = 4243;
150
+ uint64_t b = 101, q, r;
151
+
152
+ asm("dlgr %[r0],%[b]"
153
+ : [r0] "+r" (r0), [r1] "+r" (r1)
154
+ : [b] "r" (b)
155
+ : "cc");
156
+ q = r1;
157
+ r = r0;
158
+ assert(q == 42);
159
+ assert(r == 1);
160
+}
161
+
162
int main(void)
163
{
164
test_dr();
165
test_dlr();
166
+ test_dsgr();
167
+ test_dlgr();
168
+ return 0;
169
}
170
--
171
2.34.1
172
173
diff view generated by jsdifflib
New patch
1
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
target/s390x/helper.h | 2 +-
6
target/s390x/tcg/mem_helper.c | 11 ++++-------
7
target/s390x/tcg/translate.c | 8 ++++++--
8
3 files changed, 11 insertions(+), 10 deletions(-)
1
9
10
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/s390x/helper.h
13
+++ b/target/s390x/helper.h
14
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(divs64, TCG_CALL_NO_WG, i128, env, s64, s64)
15
DEF_HELPER_FLAGS_4(divu64, TCG_CALL_NO_WG, i128, env, i64, i64, i64)
16
DEF_HELPER_3(srst, void, env, i32, i32)
17
DEF_HELPER_3(srstu, void, env, i32, i32)
18
-DEF_HELPER_4(clst, i64, env, i64, i64, i64)
19
+DEF_HELPER_4(clst, i128, env, i64, i64, i64)
20
DEF_HELPER_FLAGS_4(mvn, TCG_CALL_NO_WG, void, env, i32, i64, i64)
21
DEF_HELPER_FLAGS_4(mvo, TCG_CALL_NO_WG, void, env, i32, i64, i64)
22
DEF_HELPER_FLAGS_4(mvpg, TCG_CALL_NO_WG, i32, env, i64, i32, i32)
23
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
24
index XXXXXXX..XXXXXXX 100644
25
--- a/target/s390x/tcg/mem_helper.c
26
+++ b/target/s390x/tcg/mem_helper.c
27
@@ -XXX,XX +XXX,XX @@ void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
28
}
29
30
/* unsigned string compare (c is string terminator) */
31
-uint64_t HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
32
+Int128 HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
33
{
34
uintptr_t ra = GETPC();
35
uint32_t len;
36
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
37
if (v1 == c) {
38
/* Equal. CC=0, and don't advance the registers. */
39
env->cc_op = 0;
40
- env->retxl = s2;
41
- return s1;
42
+ return int128_make128(s2, s1);
43
}
44
} else {
45
/* Unequal. CC={1,2}, and advance the registers. Note that
46
the terminator need not be zero, but the string that contains
47
the terminator is by definition "low". */
48
env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
49
- env->retxl = s2 + len;
50
- return s1 + len;
51
+ return int128_make128(s2 + len, s1 + len);
52
}
53
}
54
55
/* CPU-determined bytes equal; advance the registers. */
56
env->cc_op = 3;
57
- env->retxl = s2 + len;
58
- return s1 + len;
59
+ return int128_make128(s2 + len, s1 + len);
60
}
61
62
/* move page */
63
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
64
index XXXXXXX..XXXXXXX 100644
65
--- a/target/s390x/tcg/translate.c
66
+++ b/target/s390x/tcg/translate.c
67
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_clm(DisasContext *s, DisasOps *o)
68
69
static DisasJumpType op_clst(DisasContext *s, DisasOps *o)
70
{
71
- gen_helper_clst(o->in1, cpu_env, regs[0], o->in1, o->in2);
72
+ TCGv_i128 pair = tcg_temp_new_i128();
73
+
74
+ gen_helper_clst(pair, cpu_env, regs[0], o->in1, o->in2);
75
+ tcg_gen_extr_i128_i64(o->in2, o->in1, pair);
76
+ tcg_temp_free_i128(pair);
77
+
78
set_cc_static(s);
79
- return_low128(o->in2);
80
return DISAS_NEXT;
81
}
82
83
--
84
2.34.1
85
86
diff view generated by jsdifflib
New patch
1
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
2
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
target/s390x/helper.h | 2 +-
6
target/s390x/tcg/mem_helper.c | 7 +++----
7
target/s390x/tcg/translate.c | 6 ++++--
8
3 files changed, 8 insertions(+), 7 deletions(-)
1
9
10
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/s390x/helper.h
13
+++ b/target/s390x/helper.h
14
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(tre, i64, env, i64, i64, i64)
15
DEF_HELPER_4(trt, i32, env, i32, i64, i64)
16
DEF_HELPER_4(trtr, i32, env, i32, i64, i64)
17
DEF_HELPER_5(trXX, i32, env, i32, i32, i32, i32)
18
-DEF_HELPER_4(cksm, i64, env, i64, i64, i64)
19
+DEF_HELPER_4(cksm, i128, env, i64, i64, i64)
20
DEF_HELPER_FLAGS_5(calc_cc, TCG_CALL_NO_RWG_SE, i32, env, i32, i64, i64, i64)
21
DEF_HELPER_FLAGS_2(sfpc, TCG_CALL_NO_WG, void, env, i64)
22
DEF_HELPER_FLAGS_2(sfas, TCG_CALL_NO_WG, void, env, i64)
23
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
24
index XXXXXXX..XXXXXXX 100644
25
--- a/target/s390x/tcg/mem_helper.c
26
+++ b/target/s390x/tcg/mem_helper.c
27
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
28
}
29
30
/* checksum */
31
-uint64_t HELPER(cksm)(CPUS390XState *env, uint64_t r1,
32
- uint64_t src, uint64_t src_len)
33
+Int128 HELPER(cksm)(CPUS390XState *env, uint64_t r1,
34
+ uint64_t src, uint64_t src_len)
35
{
36
uintptr_t ra = GETPC();
37
uint64_t max_len, len;
38
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(cksm)(CPUS390XState *env, uint64_t r1,
39
env->cc_op = (len == src_len ? 0 : 3);
40
41
/* Return both cksm and processed length. */
42
- env->retxl = cksm;
43
- return len;
44
+ return int128_make128(cksm, len);
45
}
46
47
void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
48
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
49
index XXXXXXX..XXXXXXX 100644
50
--- a/target/s390x/tcg/translate.c
51
+++ b/target/s390x/tcg/translate.c
52
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_cxlgb(DisasContext *s, DisasOps *o)
53
static DisasJumpType op_cksm(DisasContext *s, DisasOps *o)
54
{
55
int r2 = get_field(s, r2);
56
+ TCGv_i128 pair = tcg_temp_new_i128();
57
TCGv_i64 len = tcg_temp_new_i64();
58
59
- gen_helper_cksm(len, cpu_env, o->in1, o->in2, regs[r2 + 1]);
60
+ gen_helper_cksm(pair, cpu_env, o->in1, o->in2, regs[r2 + 1]);
61
set_cc_static(s);
62
- return_low128(o->out);
63
+ tcg_gen_extr_i128_i64(o->out, len, pair);
64
+ tcg_temp_free_i128(pair);
65
66
tcg_gen_add_i64(regs[r2], regs[r2], len);
67
tcg_gen_sub_i64(regs[r2 + 1], regs[r2 + 1], len);
68
--
69
2.34.1
70
71
diff view generated by jsdifflib
New patch
1
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
2
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
target/s390x/helper.h | 2 +-
6
target/s390x/tcg/mem_helper.c | 7 +++----
7
target/s390x/tcg/translate.c | 7 +++++--
8
3 files changed, 9 insertions(+), 7 deletions(-)
1
9
10
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/s390x/helper.h
13
+++ b/target/s390x/helper.h
14
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(unpka, TCG_CALL_NO_WG, i32, env, i64, i32, i64)
15
DEF_HELPER_FLAGS_4(unpku, TCG_CALL_NO_WG, i32, env, i64, i32, i64)
16
DEF_HELPER_FLAGS_3(tp, TCG_CALL_NO_WG, i32, env, i64, i32)
17
DEF_HELPER_FLAGS_4(tr, TCG_CALL_NO_WG, void, env, i32, i64, i64)
18
-DEF_HELPER_4(tre, i64, env, i64, i64, i64)
19
+DEF_HELPER_4(tre, i128, env, i64, i64, i64)
20
DEF_HELPER_4(trt, i32, env, i32, i64, i64)
21
DEF_HELPER_4(trtr, i32, env, i32, i64, i64)
22
DEF_HELPER_5(trXX, i32, env, i32, i32, i32, i32)
23
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
24
index XXXXXXX..XXXXXXX 100644
25
--- a/target/s390x/tcg/mem_helper.c
26
+++ b/target/s390x/tcg/mem_helper.c
27
@@ -XXX,XX +XXX,XX @@ void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
28
do_helper_tr(env, len, array, trans, GETPC());
29
}
30
31
-uint64_t HELPER(tre)(CPUS390XState *env, uint64_t array,
32
- uint64_t len, uint64_t trans)
33
+Int128 HELPER(tre)(CPUS390XState *env, uint64_t array,
34
+ uint64_t len, uint64_t trans)
35
{
36
uintptr_t ra = GETPC();
37
uint8_t end = env->regs[0] & 0xff;
38
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(tre)(CPUS390XState *env, uint64_t array,
39
}
40
41
env->cc_op = cc;
42
- env->retxl = len - i;
43
- return array + i;
44
+ return int128_make128(len - i, array + i);
45
}
46
47
static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
48
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
49
index XXXXXXX..XXXXXXX 100644
50
--- a/target/s390x/tcg/translate.c
51
+++ b/target/s390x/tcg/translate.c
52
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_tr(DisasContext *s, DisasOps *o)
53
54
static DisasJumpType op_tre(DisasContext *s, DisasOps *o)
55
{
56
- gen_helper_tre(o->out, cpu_env, o->out, o->out2, o->in2);
57
- return_low128(o->out2);
58
+ TCGv_i128 pair = tcg_temp_new_i128();
59
+
60
+ gen_helper_tre(pair, cpu_env, o->out, o->out2, o->in2);
61
+ tcg_gen_extr_i128_i64(o->out2, o->out, pair);
62
+ tcg_temp_free_i128(pair);
63
set_cc_static(s);
64
return DISAS_NEXT;
65
}
66
--
67
2.34.1
68
69
diff view generated by jsdifflib
New patch
1
Make a copy of wout_x1 before modifying it, as wout_x1_P
2
emphasizing that it operates on the out/out2 pair. The insns
3
that use x1_P are data movement that will not change to Int128.
1
4
5
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
target/s390x/tcg/insn-data.h.inc | 12 ++++++------
10
target/s390x/tcg/translate.c | 8 ++++++++
11
2 files changed, 14 insertions(+), 6 deletions(-)
12
13
diff --git a/target/s390x/tcg/insn-data.h.inc b/target/s390x/tcg/insn-data.h.inc
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/s390x/tcg/insn-data.h.inc
16
+++ b/target/s390x/tcg/insn-data.h.inc
17
@@ -XXX,XX +XXX,XX @@
18
F(0x3800, LER, RR_a, Z, 0, e2, 0, cond_e1e2, mov2, 0, IF_AFP1 | IF_AFP2)
19
F(0x7800, LE, RX_a, Z, 0, m2_32u, 0, e1, mov2, 0, IF_AFP1)
20
F(0xed64, LEY, RXY_a, LD, 0, m2_32u, 0, e1, mov2, 0, IF_AFP1)
21
- F(0xb365, LXR, RRE, Z, x2h, x2l, 0, x1, movx, 0, IF_AFP1)
22
+ F(0xb365, LXR, RRE, Z, x2h, x2l, 0, x1_P, movx, 0, IF_AFP1)
23
/* LOAD IMMEDIATE */
24
C(0xc001, LGFI, RIL_a, EI, 0, i2, 0, r1, mov2, 0)
25
/* LOAD RELATIVE LONG */
26
@@ -XXX,XX +XXX,XX @@
27
C(0xe332, LTGF, RXY_a, GIE, 0, a2, r1, 0, ld32s, s64)
28
F(0xb302, LTEBR, RRE, Z, 0, e2, 0, cond_e1e2, mov2, f32, IF_BFP)
29
F(0xb312, LTDBR, RRE, Z, 0, f2, 0, f1, mov2, f64, IF_BFP)
30
- F(0xb342, LTXBR, RRE, Z, x2h, x2l, 0, x1, movx, f128, IF_BFP)
31
+ F(0xb342, LTXBR, RRE, Z, x2h, x2l, 0, x1_P, movx, f128, IF_BFP)
32
/* LOAD AND TRAP */
33
C(0xe39f, LAT, RXY_a, LAT, 0, m2_32u, r1, 0, lat, 0)
34
C(0xe385, LGAT, RXY_a, LAT, 0, a2, r1, 0, lgat, 0)
35
@@ -XXX,XX +XXX,XX @@
36
C(0xb913, LCGFR, RRE, Z, 0, r2_32s, r1, 0, neg, neg64)
37
F(0xb303, LCEBR, RRE, Z, 0, e2, new, e1, negf32, f32, IF_BFP)
38
F(0xb313, LCDBR, RRE, Z, 0, f2, new, f1, negf64, f64, IF_BFP)
39
- F(0xb343, LCXBR, RRE, Z, x2h, x2l, new_P, x1, negf128, f128, IF_BFP)
40
+ F(0xb343, LCXBR, RRE, Z, x2h, x2l, new_P, x1_P, negf128, f128, IF_BFP)
41
F(0xb373, LCDFR, RRE, FPSSH, 0, f2, new, f1, negf64, 0, IF_AFP1 | IF_AFP2)
42
/* LOAD COUNT TO BLOCK BOUNDARY */
43
C(0xe727, LCBB, RXE, V, la2, 0, r1, 0, lcbb, 0)
44
@@ -XXX,XX +XXX,XX @@
45
C(0xb911, LNGFR, RRE, Z, 0, r2_32s, r1, 0, nabs, nabs64)
46
F(0xb301, LNEBR, RRE, Z, 0, e2, new, e1, nabsf32, f32, IF_BFP)
47
F(0xb311, LNDBR, RRE, Z, 0, f2, new, f1, nabsf64, f64, IF_BFP)
48
- F(0xb341, LNXBR, RRE, Z, x2h, x2l, new_P, x1, nabsf128, f128, IF_BFP)
49
+ F(0xb341, LNXBR, RRE, Z, x2h, x2l, new_P, x1_P, nabsf128, f128, IF_BFP)
50
F(0xb371, LNDFR, RRE, FPSSH, 0, f2, new, f1, nabsf64, 0, IF_AFP1 | IF_AFP2)
51
/* LOAD ON CONDITION */
52
C(0xb9f2, LOCR, RRF_c, LOC, r1, r2, new, r1_32, loc, 0)
53
@@ -XXX,XX +XXX,XX @@
54
C(0xb910, LPGFR, RRE, Z, 0, r2_32s, r1, 0, abs, abs64)
55
F(0xb300, LPEBR, RRE, Z, 0, e2, new, e1, absf32, f32, IF_BFP)
56
F(0xb310, LPDBR, RRE, Z, 0, f2, new, f1, absf64, f64, IF_BFP)
57
- F(0xb340, LPXBR, RRE, Z, x2h, x2l, new_P, x1, absf128, f128, IF_BFP)
58
+ F(0xb340, LPXBR, RRE, Z, x2h, x2l, new_P, x1_P, absf128, f128, IF_BFP)
59
F(0xb370, LPDFR, RRE, FPSSH, 0, f2, new, f1, absf64, 0, IF_AFP1 | IF_AFP2)
60
/* LOAD REVERSED */
61
C(0xb91f, LRVR, RRE, Z, 0, r2_32u, new, r1_32, rev32, 0)
62
@@ -XXX,XX +XXX,XX @@
63
/* LOAD ZERO */
64
F(0xb374, LZER, RRE, Z, 0, 0, 0, e1, zero, 0, IF_AFP1)
65
F(0xb375, LZDR, RRE, Z, 0, 0, 0, f1, zero, 0, IF_AFP1)
66
- F(0xb376, LZXR, RRE, Z, 0, 0, 0, x1, zero2, 0, IF_AFP1)
67
+ F(0xb376, LZXR, RRE, Z, 0, 0, 0, x1_P, zero2, 0, IF_AFP1)
68
69
/* LOAD FPC */
70
F(0xb29d, LFPC, S, Z, 0, m2_32u, 0, 0, sfpc, 0, IF_BFP)
71
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
72
index XXXXXXX..XXXXXXX 100644
73
--- a/target/s390x/tcg/translate.c
74
+++ b/target/s390x/tcg/translate.c
75
@@ -XXX,XX +XXX,XX @@ static void wout_x1(DisasContext *s, DisasOps *o)
76
}
77
#define SPEC_wout_x1 SPEC_r1_f128
78
79
+static void wout_x1_P(DisasContext *s, DisasOps *o)
80
+{
81
+ int f1 = get_field(s, r1);
82
+ store_freg(f1, o->out);
83
+ store_freg(f1 + 2, o->out2);
84
+}
85
+#define SPEC_wout_x1_P SPEC_r1_f128
86
+
87
static void wout_cond_r1r2_32(DisasContext *s, DisasOps *o)
88
{
89
if (get_field(s, r1) != get_field(s, r2)) {
90
--
91
2.34.1
92
93
diff view generated by jsdifflib
New patch
1
Acked-by: David Hildenbrand <david@redhat.com>
2
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
v2: Remove extraneous return_low128.
6
---
7
target/s390x/helper.h | 22 +++++++-------
8
target/s390x/tcg/insn-data.h.inc | 20 ++++++-------
9
target/s390x/tcg/fpu_helper.c | 29 +++++++++---------
10
target/s390x/tcg/translate.c | 51 +++++++++++++++++---------------
11
4 files changed, 63 insertions(+), 59 deletions(-)
1
12
13
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/s390x/helper.h
16
+++ b/target/s390x/helper.h
17
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(clcle, i32, env, i32, i64, i32)
18
DEF_HELPER_4(clclu, i32, env, i32, i64, i32)
19
DEF_HELPER_3(cegb, i64, env, s64, i32)
20
DEF_HELPER_3(cdgb, i64, env, s64, i32)
21
-DEF_HELPER_3(cxgb, i64, env, s64, i32)
22
+DEF_HELPER_3(cxgb, i128, env, s64, i32)
23
DEF_HELPER_3(celgb, i64, env, i64, i32)
24
DEF_HELPER_3(cdlgb, i64, env, i64, i32)
25
-DEF_HELPER_3(cxlgb, i64, env, i64, i32)
26
+DEF_HELPER_3(cxlgb, i128, env, i64, i32)
27
DEF_HELPER_4(cdsg, void, env, i64, i32, i32)
28
DEF_HELPER_4(cdsg_parallel, void, env, i64, i32, i32)
29
DEF_HELPER_4(csst, i32, env, i32, i64, i64)
30
DEF_HELPER_4(csst_parallel, i32, env, i32, i64, i64)
31
DEF_HELPER_FLAGS_3(aeb, TCG_CALL_NO_WG, i64, env, i64, i64)
32
DEF_HELPER_FLAGS_3(adb, TCG_CALL_NO_WG, i64, env, i64, i64)
33
-DEF_HELPER_FLAGS_5(axb, TCG_CALL_NO_WG, i64, env, i64, i64, i64, i64)
34
+DEF_HELPER_FLAGS_5(axb, TCG_CALL_NO_WG, i128, env, i64, i64, i64, i64)
35
DEF_HELPER_FLAGS_3(seb, TCG_CALL_NO_WG, i64, env, i64, i64)
36
DEF_HELPER_FLAGS_3(sdb, TCG_CALL_NO_WG, i64, env, i64, i64)
37
-DEF_HELPER_FLAGS_5(sxb, TCG_CALL_NO_WG, i64, env, i64, i64, i64, i64)
38
+DEF_HELPER_FLAGS_5(sxb, TCG_CALL_NO_WG, i128, env, i64, i64, i64, i64)
39
DEF_HELPER_FLAGS_3(deb, TCG_CALL_NO_WG, i64, env, i64, i64)
40
DEF_HELPER_FLAGS_3(ddb, TCG_CALL_NO_WG, i64, env, i64, i64)
41
-DEF_HELPER_FLAGS_5(dxb, TCG_CALL_NO_WG, i64, env, i64, i64, i64, i64)
42
+DEF_HELPER_FLAGS_5(dxb, TCG_CALL_NO_WG, i128, env, i64, i64, i64, i64)
43
DEF_HELPER_FLAGS_3(meeb, TCG_CALL_NO_WG, i64, env, i64, i64)
44
DEF_HELPER_FLAGS_3(mdeb, TCG_CALL_NO_WG, i64, env, i64, i64)
45
DEF_HELPER_FLAGS_3(mdb, TCG_CALL_NO_WG, i64, env, i64, i64)
46
-DEF_HELPER_FLAGS_5(mxb, TCG_CALL_NO_WG, i64, env, i64, i64, i64, i64)
47
-DEF_HELPER_FLAGS_4(mxdb, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
48
+DEF_HELPER_FLAGS_5(mxb, TCG_CALL_NO_WG, i128, env, i64, i64, i64, i64)
49
+DEF_HELPER_FLAGS_4(mxdb, TCG_CALL_NO_WG, i128, env, i64, i64, i64)
50
DEF_HELPER_FLAGS_2(ldeb, TCG_CALL_NO_WG, i64, env, i64)
51
DEF_HELPER_FLAGS_4(ldxb, TCG_CALL_NO_WG, i64, env, i64, i64, i32)
52
-DEF_HELPER_FLAGS_2(lxdb, TCG_CALL_NO_WG, i64, env, i64)
53
-DEF_HELPER_FLAGS_2(lxeb, TCG_CALL_NO_WG, i64, env, i64)
54
+DEF_HELPER_FLAGS_2(lxdb, TCG_CALL_NO_WG, i128, env, i64)
55
+DEF_HELPER_FLAGS_2(lxeb, TCG_CALL_NO_WG, i128, env, i64)
56
DEF_HELPER_FLAGS_3(ledb, TCG_CALL_NO_WG, i64, env, i64, i32)
57
DEF_HELPER_FLAGS_4(lexb, TCG_CALL_NO_WG, i64, env, i64, i64, i32)
58
DEF_HELPER_FLAGS_3(ceb, TCG_CALL_NO_WG_SE, i32, env, i64, i64)
59
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(clfdb, i64, env, i64, i32)
60
DEF_HELPER_4(clfxb, i64, env, i64, i64, i32)
61
DEF_HELPER_FLAGS_3(fieb, TCG_CALL_NO_WG, i64, env, i64, i32)
62
DEF_HELPER_FLAGS_3(fidb, TCG_CALL_NO_WG, i64, env, i64, i32)
63
-DEF_HELPER_FLAGS_4(fixb, TCG_CALL_NO_WG, i64, env, i64, i64, i32)
64
+DEF_HELPER_FLAGS_4(fixb, TCG_CALL_NO_WG, i128, env, i64, i64, i32)
65
DEF_HELPER_FLAGS_4(maeb, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
66
DEF_HELPER_FLAGS_4(madb, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
67
DEF_HELPER_FLAGS_4(mseb, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
68
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(tcdb, TCG_CALL_NO_RWG_SE, i32, env, i64, i64)
69
DEF_HELPER_FLAGS_4(tcxb, TCG_CALL_NO_RWG_SE, i32, env, i64, i64, i64)
70
DEF_HELPER_FLAGS_2(sqeb, TCG_CALL_NO_WG, i64, env, i64)
71
DEF_HELPER_FLAGS_2(sqdb, TCG_CALL_NO_WG, i64, env, i64)
72
-DEF_HELPER_FLAGS_3(sqxb, TCG_CALL_NO_WG, i64, env, i64, i64)
73
+DEF_HELPER_FLAGS_3(sqxb, TCG_CALL_NO_WG, i128, env, i64, i64)
74
DEF_HELPER_FLAGS_1(cvd, TCG_CALL_NO_RWG_SE, i64, s32)
75
DEF_HELPER_FLAGS_4(pack, TCG_CALL_NO_WG, void, env, i32, i64, i64)
76
DEF_HELPER_FLAGS_4(pka, TCG_CALL_NO_WG, void, env, i64, i64, i32)
77
diff --git a/target/s390x/tcg/insn-data.h.inc b/target/s390x/tcg/insn-data.h.inc
78
index XXXXXXX..XXXXXXX 100644
79
--- a/target/s390x/tcg/insn-data.h.inc
80
+++ b/target/s390x/tcg/insn-data.h.inc
81
@@ -XXX,XX +XXX,XX @@
82
/* CONVERT FROM FIXED */
83
F(0xb394, CEFBR, RRF_e, Z, 0, r2_32s, new, e1, cegb, 0, IF_BFP)
84
F(0xb395, CDFBR, RRF_e, Z, 0, r2_32s, new, f1, cdgb, 0, IF_BFP)
85
- F(0xb396, CXFBR, RRF_e, Z, 0, r2_32s, new_P, x1, cxgb, 0, IF_BFP)
86
+ F(0xb396, CXFBR, RRF_e, Z, 0, r2_32s, new_x, x1, cxgb, 0, IF_BFP)
87
F(0xb3a4, CEGBR, RRF_e, Z, 0, r2_o, new, e1, cegb, 0, IF_BFP)
88
F(0xb3a5, CDGBR, RRF_e, Z, 0, r2_o, new, f1, cdgb, 0, IF_BFP)
89
- F(0xb3a6, CXGBR, RRF_e, Z, 0, r2_o, new_P, x1, cxgb, 0, IF_BFP)
90
+ F(0xb3a6, CXGBR, RRF_e, Z, 0, r2_o, new_x, x1, cxgb, 0, IF_BFP)
91
/* CONVERT TO LOGICAL */
92
F(0xb39c, CLFEBR, RRF_e, FPE, 0, e2, new, r1_32, clfeb, 0, IF_BFP)
93
F(0xb39d, CLFDBR, RRF_e, FPE, 0, f2, new, r1_32, clfdb, 0, IF_BFP)
94
@@ -XXX,XX +XXX,XX @@
95
/* CONVERT FROM LOGICAL */
96
F(0xb390, CELFBR, RRF_e, FPE, 0, r2_32u, new, e1, celgb, 0, IF_BFP)
97
F(0xb391, CDLFBR, RRF_e, FPE, 0, r2_32u, new, f1, cdlgb, 0, IF_BFP)
98
- F(0xb392, CXLFBR, RRF_e, FPE, 0, r2_32u, new_P, x1, cxlgb, 0, IF_BFP)
99
+ F(0xb392, CXLFBR, RRF_e, FPE, 0, r2_32u, new_x, x1, cxlgb, 0, IF_BFP)
100
F(0xb3a0, CELGBR, RRF_e, FPE, 0, r2_o, new, e1, celgb, 0, IF_BFP)
101
F(0xb3a1, CDLGBR, RRF_e, FPE, 0, r2_o, new, f1, cdlgb, 0, IF_BFP)
102
- F(0xb3a2, CXLGBR, RRF_e, FPE, 0, r2_o, new_P, x1, cxlgb, 0, IF_BFP)
103
+ F(0xb3a2, CXLGBR, RRF_e, FPE, 0, r2_o, new_x, x1, cxlgb, 0, IF_BFP)
104
105
/* CONVERT UTF-8 TO UTF-16 */
106
D(0xb2a7, CU12, RRF_c, Z, 0, 0, 0, 0, cuXX, 0, 12)
107
@@ -XXX,XX +XXX,XX @@
108
/* LOAD FP INTEGER */
109
F(0xb357, FIEBR, RRF_e, Z, 0, e2, new, e1, fieb, 0, IF_BFP)
110
F(0xb35f, FIDBR, RRF_e, Z, 0, f2, new, f1, fidb, 0, IF_BFP)
111
- F(0xb347, FIXBR, RRF_e, Z, x2h, x2l, new_P, x1, fixb, 0, IF_BFP)
112
+ F(0xb347, FIXBR, RRF_e, Z, x2h, x2l, new_x, x1, fixb, 0, IF_BFP)
113
114
/* LOAD LENGTHENED */
115
F(0xb304, LDEBR, RRE, Z, 0, e2, new, f1, ldeb, 0, IF_BFP)
116
- F(0xb305, LXDBR, RRE, Z, 0, f2, new_P, x1, lxdb, 0, IF_BFP)
117
- F(0xb306, LXEBR, RRE, Z, 0, e2, new_P, x1, lxeb, 0, IF_BFP)
118
+ F(0xb305, LXDBR, RRE, Z, 0, f2, new_x, x1, lxdb, 0, IF_BFP)
119
+ F(0xb306, LXEBR, RRE, Z, 0, e2, new_x, x1, lxeb, 0, IF_BFP)
120
F(0xed04, LDEB, RXE, Z, 0, m2_32u, new, f1, ldeb, 0, IF_BFP)
121
- F(0xed05, LXDB, RXE, Z, 0, m2_64, new_P, x1, lxdb, 0, IF_BFP)
122
- F(0xed06, LXEB, RXE, Z, 0, m2_32u, new_P, x1, lxeb, 0, IF_BFP)
123
+ F(0xed05, LXDB, RXE, Z, 0, m2_64, new_x, x1, lxdb, 0, IF_BFP)
124
+ F(0xed06, LXEB, RXE, Z, 0, m2_32u, new_x, x1, lxeb, 0, IF_BFP)
125
F(0xb324, LDER, RXE, Z, 0, e2, new, f1, lde, 0, IF_AFP1)
126
F(0xed24, LDE, RXE, Z, 0, m2_32u, new, f1, lde, 0, IF_AFP1)
127
/* LOAD ROUNDED */
128
@@ -XXX,XX +XXX,XX @@
129
/* SQUARE ROOT */
130
F(0xb314, SQEBR, RRE, Z, 0, e2, new, e1, sqeb, 0, IF_BFP)
131
F(0xb315, SQDBR, RRE, Z, 0, f2, new, f1, sqdb, 0, IF_BFP)
132
- F(0xb316, SQXBR, RRE, Z, x2h, x2l, new_P, x1, sqxb, 0, IF_BFP)
133
+ F(0xb316, SQXBR, RRE, Z, x2h, x2l, new_x, x1, sqxb, 0, IF_BFP)
134
F(0xed14, SQEB, RXE, Z, 0, m2_32u, new, e1, sqeb, 0, IF_BFP)
135
F(0xed15, SQDB, RXE, Z, 0, m2_64, new, f1, sqdb, 0, IF_BFP)
136
137
diff --git a/target/s390x/tcg/fpu_helper.c b/target/s390x/tcg/fpu_helper.c
138
index XXXXXXX..XXXXXXX 100644
139
--- a/target/s390x/tcg/fpu_helper.c
140
+++ b/target/s390x/tcg/fpu_helper.c
141
@@ -XXX,XX +XXX,XX @@
142
#define HELPER_LOG(x...)
143
#endif
144
145
-#define RET128(F) (env->retxl = F.low, F.high)
146
+static inline Int128 RET128(float128 f)
147
+{
148
+ return int128_make128(f.low, f.high);
149
+}
150
151
uint8_t s390_softfloat_exc_to_ieee(unsigned int exc)
152
{
153
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(adb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
154
}
155
156
/* 128-bit FP addition */
157
-uint64_t HELPER(axb)(CPUS390XState *env, uint64_t ah, uint64_t al,
158
+Int128 HELPER(axb)(CPUS390XState *env, uint64_t ah, uint64_t al,
159
uint64_t bh, uint64_t bl)
160
{
161
float128 ret = float128_add(make_float128(ah, al),
162
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(sdb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
163
}
164
165
/* 128-bit FP subtraction */
166
-uint64_t HELPER(sxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
167
+Int128 HELPER(sxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
168
uint64_t bh, uint64_t bl)
169
{
170
float128 ret = float128_sub(make_float128(ah, al),
171
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(ddb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
172
}
173
174
/* 128-bit FP division */
175
-uint64_t HELPER(dxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
176
+Int128 HELPER(dxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
177
uint64_t bh, uint64_t bl)
178
{
179
float128 ret = float128_div(make_float128(ah, al),
180
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(mdeb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
181
}
182
183
/* 128-bit FP multiplication */
184
-uint64_t HELPER(mxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
185
+Int128 HELPER(mxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
186
uint64_t bh, uint64_t bl)
187
{
188
float128 ret = float128_mul(make_float128(ah, al),
189
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(mxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
190
}
191
192
/* 128/64-bit FP multiplication */
193
-uint64_t HELPER(mxdb)(CPUS390XState *env, uint64_t ah, uint64_t al,
194
- uint64_t f2)
195
+Int128 HELPER(mxdb)(CPUS390XState *env, uint64_t ah, uint64_t al, uint64_t f2)
196
{
197
float128 ret = float64_to_float128(f2, &env->fpu_status);
198
ret = float128_mul(make_float128(ah, al), ret, &env->fpu_status);
199
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(ldxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
200
}
201
202
/* convert 64-bit float to 128-bit float */
203
-uint64_t HELPER(lxdb)(CPUS390XState *env, uint64_t f2)
204
+Int128 HELPER(lxdb)(CPUS390XState *env, uint64_t f2)
205
{
206
float128 ret = float64_to_float128(f2, &env->fpu_status);
207
handle_exceptions(env, false, GETPC());
208
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(lxdb)(CPUS390XState *env, uint64_t f2)
209
}
210
211
/* convert 32-bit float to 128-bit float */
212
-uint64_t HELPER(lxeb)(CPUS390XState *env, uint64_t f2)
213
+Int128 HELPER(lxeb)(CPUS390XState *env, uint64_t f2)
214
{
215
float128 ret = float32_to_float128(f2, &env->fpu_status);
216
handle_exceptions(env, false, GETPC());
217
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(cdgb)(CPUS390XState *env, int64_t v2, uint32_t m34)
218
}
219
220
/* convert 64-bit int to 128-bit float */
221
-uint64_t HELPER(cxgb)(CPUS390XState *env, int64_t v2, uint32_t m34)
222
+Int128 HELPER(cxgb)(CPUS390XState *env, int64_t v2, uint32_t m34)
223
{
224
int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
225
float128 ret = int64_to_float128(v2, &env->fpu_status);
226
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(cdlgb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
227
}
228
229
/* convert 64-bit uint to 128-bit float */
230
-uint64_t HELPER(cxlgb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
231
+Int128 HELPER(cxlgb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
232
{
233
int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
234
float128 ret = uint64_to_float128(v2, &env->fpu_status);
235
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(fidb)(CPUS390XState *env, uint64_t f2, uint32_t m34)
236
}
237
238
/* round to integer 128-bit */
239
-uint64_t HELPER(fixb)(CPUS390XState *env, uint64_t ah, uint64_t al,
240
- uint32_t m34)
241
+Int128 HELPER(fixb)(CPUS390XState *env, uint64_t ah, uint64_t al, uint32_t m34)
242
{
243
int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
244
float128 ret = float128_round_to_int(make_float128(ah, al),
245
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(sqdb)(CPUS390XState *env, uint64_t f2)
246
}
247
248
/* square root 128-bit */
249
-uint64_t HELPER(sqxb)(CPUS390XState *env, uint64_t ah, uint64_t al)
250
+Int128 HELPER(sqxb)(CPUS390XState *env, uint64_t ah, uint64_t al)
251
{
252
float128 ret = float128_sqrt(make_float128(ah, al), &env->fpu_status);
253
handle_exceptions(env, false, GETPC());
254
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
255
index XXXXXXX..XXXXXXX 100644
256
--- a/target/s390x/tcg/translate.c
257
+++ b/target/s390x/tcg/translate.c
258
@@ -XXX,XX +XXX,XX @@ typedef struct {
259
bool g_out, g_out2, g_in1, g_in2;
260
TCGv_i64 out, out2, in1, in2;
261
TCGv_i64 addr1;
262
+ TCGv_i128 out_128;
263
} DisasOps;
264
265
/* Instructions can place constraints on their operands, raising specification
266
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_adb(DisasContext *s, DisasOps *o)
267
268
static DisasJumpType op_axb(DisasContext *s, DisasOps *o)
269
{
270
- gen_helper_axb(o->out, cpu_env, o->out, o->out2, o->in1, o->in2);
271
- return_low128(o->out2);
272
+ gen_helper_axb(o->out_128, cpu_env, o->out, o->out2, o->in1, o->in2);
273
return DISAS_NEXT;
274
}
275
276
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_cxgb(DisasContext *s, DisasOps *o)
277
if (!m34) {
278
return DISAS_NORETURN;
279
}
280
- gen_helper_cxgb(o->out, cpu_env, o->in2, m34);
281
+ gen_helper_cxgb(o->out_128, cpu_env, o->in2, m34);
282
tcg_temp_free_i32(m34);
283
- return_low128(o->out2);
284
return DISAS_NEXT;
285
}
286
287
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_cxlgb(DisasContext *s, DisasOps *o)
288
if (!m34) {
289
return DISAS_NORETURN;
290
}
291
- gen_helper_cxlgb(o->out, cpu_env, o->in2, m34);
292
+ gen_helper_cxlgb(o->out_128, cpu_env, o->in2, m34);
293
tcg_temp_free_i32(m34);
294
- return_low128(o->out2);
295
return DISAS_NEXT;
296
}
297
298
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_ddb(DisasContext *s, DisasOps *o)
299
300
static DisasJumpType op_dxb(DisasContext *s, DisasOps *o)
301
{
302
- gen_helper_dxb(o->out, cpu_env, o->out, o->out2, o->in1, o->in2);
303
- return_low128(o->out2);
304
+ gen_helper_dxb(o->out_128, cpu_env, o->out, o->out2, o->in1, o->in2);
305
return DISAS_NEXT;
306
}
307
308
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_fixb(DisasContext *s, DisasOps *o)
309
if (!m34) {
310
return DISAS_NORETURN;
311
}
312
- gen_helper_fixb(o->out, cpu_env, o->in1, o->in2, m34);
313
- return_low128(o->out2);
314
+ gen_helper_fixb(o->out_128, cpu_env, o->in1, o->in2, m34);
315
tcg_temp_free_i32(m34);
316
return DISAS_NEXT;
317
}
318
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_lexb(DisasContext *s, DisasOps *o)
319
320
static DisasJumpType op_lxdb(DisasContext *s, DisasOps *o)
321
{
322
- gen_helper_lxdb(o->out, cpu_env, o->in2);
323
- return_low128(o->out2);
324
+ gen_helper_lxdb(o->out_128, cpu_env, o->in2);
325
return DISAS_NEXT;
326
}
327
328
static DisasJumpType op_lxeb(DisasContext *s, DisasOps *o)
329
{
330
- gen_helper_lxeb(o->out, cpu_env, o->in2);
331
- return_low128(o->out2);
332
+ gen_helper_lxeb(o->out_128, cpu_env, o->in2);
333
return DISAS_NEXT;
334
}
335
336
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_mdb(DisasContext *s, DisasOps *o)
337
338
static DisasJumpType op_mxb(DisasContext *s, DisasOps *o)
339
{
340
- gen_helper_mxb(o->out, cpu_env, o->out, o->out2, o->in1, o->in2);
341
- return_low128(o->out2);
342
+ gen_helper_mxb(o->out_128, cpu_env, o->out, o->out2, o->in1, o->in2);
343
return DISAS_NEXT;
344
}
345
346
static DisasJumpType op_mxdb(DisasContext *s, DisasOps *o)
347
{
348
- gen_helper_mxdb(o->out, cpu_env, o->out, o->out2, o->in2);
349
- return_low128(o->out2);
350
+ gen_helper_mxdb(o->out_128, cpu_env, o->out, o->out2, o->in2);
351
return DISAS_NEXT;
352
}
353
354
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_sdb(DisasContext *s, DisasOps *o)
355
356
static DisasJumpType op_sxb(DisasContext *s, DisasOps *o)
357
{
358
- gen_helper_sxb(o->out, cpu_env, o->out, o->out2, o->in1, o->in2);
359
- return_low128(o->out2);
360
+ gen_helper_sxb(o->out_128, cpu_env, o->out, o->out2, o->in1, o->in2);
361
return DISAS_NEXT;
362
}
363
364
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_sqdb(DisasContext *s, DisasOps *o)
365
366
static DisasJumpType op_sqxb(DisasContext *s, DisasOps *o)
367
{
368
- gen_helper_sqxb(o->out, cpu_env, o->in1, o->in2);
369
- return_low128(o->out2);
370
+ gen_helper_sqxb(o->out_128, cpu_env, o->in1, o->in2);
371
return DISAS_NEXT;
372
}
373
374
@@ -XXX,XX +XXX,XX @@ static void prep_new_P(DisasContext *s, DisasOps *o)
375
}
376
#define SPEC_prep_new_P 0
377
378
+static void prep_new_x(DisasContext *s, DisasOps *o)
379
+{
380
+ o->out = tcg_temp_new_i64();
381
+ o->out2 = tcg_temp_new_i64();
382
+ o->out_128 = tcg_temp_new_i128();
383
+}
384
+#define SPEC_prep_new_x 0
385
+
386
static void prep_r1(DisasContext *s, DisasOps *o)
387
{
388
o->out = regs[get_field(s, r1)];
389
@@ -XXX,XX +XXX,XX @@ static void prep_r1_P(DisasContext *s, DisasOps *o)
390
}
391
#define SPEC_prep_r1_P SPEC_r1_even
392
393
-/* Whenever we need x1 in addition to other inputs, we'll load it to out/out2 */
394
static void prep_x1(DisasContext *s, DisasOps *o)
395
{
396
o->out = load_freg(get_field(s, r1));
397
o->out2 = load_freg(get_field(s, r1) + 2);
398
+ o->out_128 = tcg_temp_new_i128();
399
+ tcg_gen_concat_i64_i128(o->out_128, o->out2, o->out);
400
}
401
#define SPEC_prep_x1 SPEC_r1_f128
402
403
@@ -XXX,XX +XXX,XX @@ static void wout_f1(DisasContext *s, DisasOps *o)
404
static void wout_x1(DisasContext *s, DisasOps *o)
405
{
406
int f1 = get_field(s, r1);
407
+
408
+ tcg_gen_extr_i128_i64(o->out2, o->out, o->out_128);
409
store_freg(f1, o->out);
410
store_freg(f1 + 2, o->out2);
411
}
412
@@ -XXX,XX +XXX,XX @@ static DisasJumpType translate_one(CPUS390XState *env, DisasContext *s)
413
if (o.addr1) {
414
tcg_temp_free_i64(o.addr1);
415
}
416
-
417
+ if (o.out_128) {
418
+ tcg_temp_free_i128(o.out_128);
419
+ }
420
/* io should be the last instruction in tb when icount is enabled */
421
if (unlikely(icount && ret == DISAS_NEXT)) {
422
ret = DISAS_TOO_MANY;
423
--
424
2.34.1
425
426
diff view generated by jsdifflib
New patch
1
Acked-by: David Hildenbrand <david@redhat.com>
2
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
v2: Fix SPEC_in1_x1.
6
---
7
target/s390x/helper.h | 32 ++++++------
8
target/s390x/tcg/insn-data.h.inc | 30 +++++------
9
target/s390x/tcg/fpu_helper.c | 88 ++++++++++++++------------------
10
target/s390x/tcg/translate.c | 76 ++++++++++++++++++---------
11
4 files changed, 121 insertions(+), 105 deletions(-)
1
12
13
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/s390x/helper.h
16
+++ b/target/s390x/helper.h
17
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_4(csst, i32, env, i32, i64, i64)
18
DEF_HELPER_4(csst_parallel, i32, env, i32, i64, i64)
19
DEF_HELPER_FLAGS_3(aeb, TCG_CALL_NO_WG, i64, env, i64, i64)
20
DEF_HELPER_FLAGS_3(adb, TCG_CALL_NO_WG, i64, env, i64, i64)
21
-DEF_HELPER_FLAGS_5(axb, TCG_CALL_NO_WG, i128, env, i64, i64, i64, i64)
22
+DEF_HELPER_FLAGS_3(axb, TCG_CALL_NO_WG, i128, env, i128, i128)
23
DEF_HELPER_FLAGS_3(seb, TCG_CALL_NO_WG, i64, env, i64, i64)
24
DEF_HELPER_FLAGS_3(sdb, TCG_CALL_NO_WG, i64, env, i64, i64)
25
-DEF_HELPER_FLAGS_5(sxb, TCG_CALL_NO_WG, i128, env, i64, i64, i64, i64)
26
+DEF_HELPER_FLAGS_3(sxb, TCG_CALL_NO_WG, i128, env, i128, i128)
27
DEF_HELPER_FLAGS_3(deb, TCG_CALL_NO_WG, i64, env, i64, i64)
28
DEF_HELPER_FLAGS_3(ddb, TCG_CALL_NO_WG, i64, env, i64, i64)
29
-DEF_HELPER_FLAGS_5(dxb, TCG_CALL_NO_WG, i128, env, i64, i64, i64, i64)
30
+DEF_HELPER_FLAGS_3(dxb, TCG_CALL_NO_WG, i128, env, i128, i128)
31
DEF_HELPER_FLAGS_3(meeb, TCG_CALL_NO_WG, i64, env, i64, i64)
32
DEF_HELPER_FLAGS_3(mdeb, TCG_CALL_NO_WG, i64, env, i64, i64)
33
DEF_HELPER_FLAGS_3(mdb, TCG_CALL_NO_WG, i64, env, i64, i64)
34
-DEF_HELPER_FLAGS_5(mxb, TCG_CALL_NO_WG, i128, env, i64, i64, i64, i64)
35
-DEF_HELPER_FLAGS_4(mxdb, TCG_CALL_NO_WG, i128, env, i64, i64, i64)
36
+DEF_HELPER_FLAGS_3(mxb, TCG_CALL_NO_WG, i128, env, i128, i128)
37
+DEF_HELPER_FLAGS_3(mxdb, TCG_CALL_NO_WG, i128, env, i128, i64)
38
DEF_HELPER_FLAGS_2(ldeb, TCG_CALL_NO_WG, i64, env, i64)
39
-DEF_HELPER_FLAGS_4(ldxb, TCG_CALL_NO_WG, i64, env, i64, i64, i32)
40
+DEF_HELPER_FLAGS_3(ldxb, TCG_CALL_NO_WG, i64, env, i128, i32)
41
DEF_HELPER_FLAGS_2(lxdb, TCG_CALL_NO_WG, i128, env, i64)
42
DEF_HELPER_FLAGS_2(lxeb, TCG_CALL_NO_WG, i128, env, i64)
43
DEF_HELPER_FLAGS_3(ledb, TCG_CALL_NO_WG, i64, env, i64, i32)
44
-DEF_HELPER_FLAGS_4(lexb, TCG_CALL_NO_WG, i64, env, i64, i64, i32)
45
+DEF_HELPER_FLAGS_3(lexb, TCG_CALL_NO_WG, i64, env, i128, i32)
46
DEF_HELPER_FLAGS_3(ceb, TCG_CALL_NO_WG_SE, i32, env, i64, i64)
47
DEF_HELPER_FLAGS_3(cdb, TCG_CALL_NO_WG_SE, i32, env, i64, i64)
48
-DEF_HELPER_FLAGS_5(cxb, TCG_CALL_NO_WG_SE, i32, env, i64, i64, i64, i64)
49
+DEF_HELPER_FLAGS_3(cxb, TCG_CALL_NO_WG_SE, i32, env, i128, i128)
50
DEF_HELPER_FLAGS_3(keb, TCG_CALL_NO_WG, i32, env, i64, i64)
51
DEF_HELPER_FLAGS_3(kdb, TCG_CALL_NO_WG, i32, env, i64, i64)
52
-DEF_HELPER_FLAGS_5(kxb, TCG_CALL_NO_WG, i32, env, i64, i64, i64, i64)
53
+DEF_HELPER_FLAGS_3(kxb, TCG_CALL_NO_WG, i32, env, i128, i128)
54
DEF_HELPER_3(cgeb, i64, env, i64, i32)
55
DEF_HELPER_3(cgdb, i64, env, i64, i32)
56
-DEF_HELPER_4(cgxb, i64, env, i64, i64, i32)
57
+DEF_HELPER_3(cgxb, i64, env, i128, i32)
58
DEF_HELPER_3(cfeb, i64, env, i64, i32)
59
DEF_HELPER_3(cfdb, i64, env, i64, i32)
60
-DEF_HELPER_4(cfxb, i64, env, i64, i64, i32)
61
+DEF_HELPER_3(cfxb, i64, env, i128, i32)
62
DEF_HELPER_3(clgeb, i64, env, i64, i32)
63
DEF_HELPER_3(clgdb, i64, env, i64, i32)
64
-DEF_HELPER_4(clgxb, i64, env, i64, i64, i32)
65
+DEF_HELPER_3(clgxb, i64, env, i128, i32)
66
DEF_HELPER_3(clfeb, i64, env, i64, i32)
67
DEF_HELPER_3(clfdb, i64, env, i64, i32)
68
-DEF_HELPER_4(clfxb, i64, env, i64, i64, i32)
69
+DEF_HELPER_3(clfxb, i64, env, i128, i32)
70
DEF_HELPER_FLAGS_3(fieb, TCG_CALL_NO_WG, i64, env, i64, i32)
71
DEF_HELPER_FLAGS_3(fidb, TCG_CALL_NO_WG, i64, env, i64, i32)
72
-DEF_HELPER_FLAGS_4(fixb, TCG_CALL_NO_WG, i128, env, i64, i64, i32)
73
+DEF_HELPER_FLAGS_3(fixb, TCG_CALL_NO_WG, i128, env, i128, i32)
74
DEF_HELPER_FLAGS_4(maeb, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
75
DEF_HELPER_FLAGS_4(madb, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
76
DEF_HELPER_FLAGS_4(mseb, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
77
DEF_HELPER_FLAGS_4(msdb, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
78
DEF_HELPER_FLAGS_3(tceb, TCG_CALL_NO_RWG_SE, i32, env, i64, i64)
79
DEF_HELPER_FLAGS_3(tcdb, TCG_CALL_NO_RWG_SE, i32, env, i64, i64)
80
-DEF_HELPER_FLAGS_4(tcxb, TCG_CALL_NO_RWG_SE, i32, env, i64, i64, i64)
81
+DEF_HELPER_FLAGS_3(tcxb, TCG_CALL_NO_RWG_SE, i32, env, i128, i64)
82
DEF_HELPER_FLAGS_2(sqeb, TCG_CALL_NO_WG, i64, env, i64)
83
DEF_HELPER_FLAGS_2(sqdb, TCG_CALL_NO_WG, i64, env, i64)
84
-DEF_HELPER_FLAGS_3(sqxb, TCG_CALL_NO_WG, i128, env, i64, i64)
85
+DEF_HELPER_FLAGS_2(sqxb, TCG_CALL_NO_WG, i128, env, i128)
86
DEF_HELPER_FLAGS_1(cvd, TCG_CALL_NO_RWG_SE, i64, s32)
87
DEF_HELPER_FLAGS_4(pack, TCG_CALL_NO_WG, void, env, i32, i64, i64)
88
DEF_HELPER_FLAGS_4(pka, TCG_CALL_NO_WG, void, env, i64, i64, i32)
89
diff --git a/target/s390x/tcg/insn-data.h.inc b/target/s390x/tcg/insn-data.h.inc
90
index XXXXXXX..XXXXXXX 100644
91
--- a/target/s390x/tcg/insn-data.h.inc
92
+++ b/target/s390x/tcg/insn-data.h.inc
93
@@ -XXX,XX +XXX,XX @@
94
C(0xe318, AGF, RXY_a, Z, r1, m2_32s, r1, 0, add, adds64)
95
F(0xb30a, AEBR, RRE, Z, e1, e2, new, e1, aeb, f32, IF_BFP)
96
F(0xb31a, ADBR, RRE, Z, f1, f2, new, f1, adb, f64, IF_BFP)
97
- F(0xb34a, AXBR, RRE, Z, x2h, x2l, x1, x1, axb, f128, IF_BFP)
98
+ F(0xb34a, AXBR, RRE, Z, x1, x2, new_x, x1, axb, f128, IF_BFP)
99
F(0xed0a, AEB, RXE, Z, e1, m2_32u, new, e1, aeb, f32, IF_BFP)
100
F(0xed1a, ADB, RXE, Z, f1, m2_64, new, f1, adb, f64, IF_BFP)
101
/* ADD HIGH */
102
@@ -XXX,XX +XXX,XX @@
103
C(0xe330, CGF, RXY_a, Z, r1_o, m2_32s, 0, 0, 0, cmps64)
104
F(0xb309, CEBR, RRE, Z, e1, e2, 0, 0, ceb, 0, IF_BFP)
105
F(0xb319, CDBR, RRE, Z, f1, f2, 0, 0, cdb, 0, IF_BFP)
106
- F(0xb349, CXBR, RRE, Z, x2h, x2l, x1, 0, cxb, 0, IF_BFP)
107
+ F(0xb349, CXBR, RRE, Z, x1, x2, 0, 0, cxb, 0, IF_BFP)
108
F(0xed09, CEB, RXE, Z, e1, m2_32u, 0, 0, ceb, 0, IF_BFP)
109
F(0xed19, CDB, RXE, Z, f1, m2_64, 0, 0, cdb, 0, IF_BFP)
110
/* COMPARE AND SIGNAL */
111
F(0xb308, KEBR, RRE, Z, e1, e2, 0, 0, keb, 0, IF_BFP)
112
F(0xb318, KDBR, RRE, Z, f1, f2, 0, 0, kdb, 0, IF_BFP)
113
- F(0xb348, KXBR, RRE, Z, x2h, x2l, x1, 0, kxb, 0, IF_BFP)
114
+ F(0xb348, KXBR, RRE, Z, x1, x2, 0, 0, kxb, 0, IF_BFP)
115
F(0xed08, KEB, RXE, Z, e1, m2_32u, 0, 0, keb, 0, IF_BFP)
116
F(0xed18, KDB, RXE, Z, f1, m2_64, 0, 0, kdb, 0, IF_BFP)
117
/* COMPARE IMMEDIATE */
118
@@ -XXX,XX +XXX,XX @@
119
/* CONVERT TO FIXED */
120
F(0xb398, CFEBR, RRF_e, Z, 0, e2, new, r1_32, cfeb, 0, IF_BFP)
121
F(0xb399, CFDBR, RRF_e, Z, 0, f2, new, r1_32, cfdb, 0, IF_BFP)
122
- F(0xb39a, CFXBR, RRF_e, Z, x2h, x2l, new, r1_32, cfxb, 0, IF_BFP)
123
+ F(0xb39a, CFXBR, RRF_e, Z, 0, x2, new, r1_32, cfxb, 0, IF_BFP)
124
F(0xb3a8, CGEBR, RRF_e, Z, 0, e2, r1, 0, cgeb, 0, IF_BFP)
125
F(0xb3a9, CGDBR, RRF_e, Z, 0, f2, r1, 0, cgdb, 0, IF_BFP)
126
- F(0xb3aa, CGXBR, RRF_e, Z, x2h, x2l, r1, 0, cgxb, 0, IF_BFP)
127
+ F(0xb3aa, CGXBR, RRF_e, Z, 0, x2, r1, 0, cgxb, 0, IF_BFP)
128
/* CONVERT FROM FIXED */
129
F(0xb394, CEFBR, RRF_e, Z, 0, r2_32s, new, e1, cegb, 0, IF_BFP)
130
F(0xb395, CDFBR, RRF_e, Z, 0, r2_32s, new, f1, cdgb, 0, IF_BFP)
131
@@ -XXX,XX +XXX,XX @@
132
/* CONVERT TO LOGICAL */
133
F(0xb39c, CLFEBR, RRF_e, FPE, 0, e2, new, r1_32, clfeb, 0, IF_BFP)
134
F(0xb39d, CLFDBR, RRF_e, FPE, 0, f2, new, r1_32, clfdb, 0, IF_BFP)
135
- F(0xb39e, CLFXBR, RRF_e, FPE, x2h, x2l, new, r1_32, clfxb, 0, IF_BFP)
136
+ F(0xb39e, CLFXBR, RRF_e, FPE, 0, x2, new, r1_32, clfxb, 0, IF_BFP)
137
F(0xb3ac, CLGEBR, RRF_e, FPE, 0, e2, r1, 0, clgeb, 0, IF_BFP)
138
F(0xb3ad, CLGDBR, RRF_e, FPE, 0, f2, r1, 0, clgdb, 0, IF_BFP)
139
- F(0xb3ae, CLGXBR, RRF_e, FPE, x2h, x2l, r1, 0, clgxb, 0, IF_BFP)
140
+ F(0xb3ae, CLGXBR, RRF_e, FPE, 0, x2, r1, 0, clgxb, 0, IF_BFP)
141
/* CONVERT FROM LOGICAL */
142
F(0xb390, CELFBR, RRF_e, FPE, 0, r2_32u, new, e1, celgb, 0, IF_BFP)
143
F(0xb391, CDLFBR, RRF_e, FPE, 0, r2_32u, new, f1, cdlgb, 0, IF_BFP)
144
@@ -XXX,XX +XXX,XX @@
145
C(0x5d00, D, RX_a, Z, r1_D32, m2_32s, new_P, r1_P32, divs32, 0)
146
F(0xb30d, DEBR, RRE, Z, e1, e2, new, e1, deb, 0, IF_BFP)
147
F(0xb31d, DDBR, RRE, Z, f1, f2, new, f1, ddb, 0, IF_BFP)
148
- F(0xb34d, DXBR, RRE, Z, x2h, x2l, x1, x1, dxb, 0, IF_BFP)
149
+ F(0xb34d, DXBR, RRE, Z, x1, x2, new_x, x1, dxb, 0, IF_BFP)
150
F(0xed0d, DEB, RXE, Z, e1, m2_32u, new, e1, deb, 0, IF_BFP)
151
F(0xed1d, DDB, RXE, Z, f1, m2_64, new, f1, ddb, 0, IF_BFP)
152
/* DIVIDE LOGICAL */
153
@@ -XXX,XX +XXX,XX @@
154
/* LOAD FP INTEGER */
155
F(0xb357, FIEBR, RRF_e, Z, 0, e2, new, e1, fieb, 0, IF_BFP)
156
F(0xb35f, FIDBR, RRF_e, Z, 0, f2, new, f1, fidb, 0, IF_BFP)
157
- F(0xb347, FIXBR, RRF_e, Z, x2h, x2l, new_x, x1, fixb, 0, IF_BFP)
158
+ F(0xb347, FIXBR, RRF_e, Z, 0, x2, new_x, x1, fixb, 0, IF_BFP)
159
160
/* LOAD LENGTHENED */
161
F(0xb304, LDEBR, RRE, Z, 0, e2, new, f1, ldeb, 0, IF_BFP)
162
@@ -XXX,XX +XXX,XX @@
163
F(0xed24, LDE, RXE, Z, 0, m2_32u, new, f1, lde, 0, IF_AFP1)
164
/* LOAD ROUNDED */
165
F(0xb344, LEDBR, RRF_e, Z, 0, f2, new, e1, ledb, 0, IF_BFP)
166
- F(0xb345, LDXBR, RRF_e, Z, x2h, x2l, new, f1, ldxb, 0, IF_BFP)
167
- F(0xb346, LEXBR, RRF_e, Z, x2h, x2l, new, e1, lexb, 0, IF_BFP)
168
+ F(0xb345, LDXBR, RRF_e, Z, 0, x2, new, f1, ldxb, 0, IF_BFP)
169
+ F(0xb346, LEXBR, RRF_e, Z, 0, x2, new, e1, lexb, 0, IF_BFP)
170
171
/* LOAD MULTIPLE */
172
C(0x9800, LM, RS_a, Z, 0, a2, 0, 0, lm32, 0)
173
@@ -XXX,XX +XXX,XX @@
174
C(0xe384, MG, RXY_a, MIE2,r1p1_o, m2_64, r1_P, 0, muls128, 0)
175
F(0xb317, MEEBR, RRE, Z, e1, e2, new, e1, meeb, 0, IF_BFP)
176
F(0xb31c, MDBR, RRE, Z, f1, f2, new, f1, mdb, 0, IF_BFP)
177
- F(0xb34c, MXBR, RRE, Z, x2h, x2l, x1, x1, mxb, 0, IF_BFP)
178
+ F(0xb34c, MXBR, RRE, Z, x1, x2, new_x, x1, mxb, 0, IF_BFP)
179
F(0xb30c, MDEBR, RRE, Z, f1, e2, new, f1, mdeb, 0, IF_BFP)
180
F(0xb307, MXDBR, RRE, Z, 0, f2, x1, x1, mxdb, 0, IF_BFP)
181
F(0xed17, MEEB, RXE, Z, e1, m2_32u, new, e1, meeb, 0, IF_BFP)
182
@@ -XXX,XX +XXX,XX @@
183
/* SQUARE ROOT */
184
F(0xb314, SQEBR, RRE, Z, 0, e2, new, e1, sqeb, 0, IF_BFP)
185
F(0xb315, SQDBR, RRE, Z, 0, f2, new, f1, sqdb, 0, IF_BFP)
186
- F(0xb316, SQXBR, RRE, Z, x2h, x2l, new_x, x1, sqxb, 0, IF_BFP)
187
+ F(0xb316, SQXBR, RRE, Z, 0, x2, new_x, x1, sqxb, 0, IF_BFP)
188
F(0xed14, SQEB, RXE, Z, 0, m2_32u, new, e1, sqeb, 0, IF_BFP)
189
F(0xed15, SQDB, RXE, Z, 0, m2_64, new, f1, sqdb, 0, IF_BFP)
190
191
@@ -XXX,XX +XXX,XX @@
192
C(0xe319, SGF, RXY_a, Z, r1, m2_32s, r1, 0, sub, subs64)
193
F(0xb30b, SEBR, RRE, Z, e1, e2, new, e1, seb, f32, IF_BFP)
194
F(0xb31b, SDBR, RRE, Z, f1, f2, new, f1, sdb, f64, IF_BFP)
195
- F(0xb34b, SXBR, RRE, Z, x2h, x2l, x1, x1, sxb, f128, IF_BFP)
196
+ F(0xb34b, SXBR, RRE, Z, x1, x2, new_x, x1, sxb, f128, IF_BFP)
197
F(0xed0b, SEB, RXE, Z, e1, m2_32u, new, e1, seb, f32, IF_BFP)
198
F(0xed1b, SDB, RXE, Z, f1, m2_64, new, f1, sdb, f64, IF_BFP)
199
/* SUBTRACT HALFWORD */
200
@@ -XXX,XX +XXX,XX @@
201
/* TEST DATA CLASS */
202
F(0xed10, TCEB, RXE, Z, e1, a2, 0, 0, tceb, 0, IF_BFP)
203
F(0xed11, TCDB, RXE, Z, f1, a2, 0, 0, tcdb, 0, IF_BFP)
204
- F(0xed12, TCXB, RXE, Z, 0, a2, x1, 0, tcxb, 0, IF_BFP)
205
+ F(0xed12, TCXB, RXE, Z, x1, a2, 0, 0, tcxb, 0, IF_BFP)
206
207
/* TEST DECIMAL */
208
C(0xebc0, TP, RSL, E2, la1, 0, 0, 0, tp, 0)
209
diff --git a/target/s390x/tcg/fpu_helper.c b/target/s390x/tcg/fpu_helper.c
210
index XXXXXXX..XXXXXXX 100644
211
--- a/target/s390x/tcg/fpu_helper.c
212
+++ b/target/s390x/tcg/fpu_helper.c
213
@@ -XXX,XX +XXX,XX @@ static inline Int128 RET128(float128 f)
214
return int128_make128(f.low, f.high);
215
}
216
217
+static inline float128 ARG128(Int128 i)
218
+{
219
+ return make_float128(int128_gethi(i), int128_getlo(i));
220
+}
221
+
222
uint8_t s390_softfloat_exc_to_ieee(unsigned int exc)
223
{
224
uint8_t s390_exc = 0;
225
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(adb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
226
}
227
228
/* 128-bit FP addition */
229
-Int128 HELPER(axb)(CPUS390XState *env, uint64_t ah, uint64_t al,
230
- uint64_t bh, uint64_t bl)
231
+Int128 HELPER(axb)(CPUS390XState *env, Int128 a, Int128 b)
232
{
233
- float128 ret = float128_add(make_float128(ah, al),
234
- make_float128(bh, bl),
235
- &env->fpu_status);
236
+ float128 ret = float128_add(ARG128(a), ARG128(b), &env->fpu_status);
237
handle_exceptions(env, false, GETPC());
238
return RET128(ret);
239
}
240
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(sdb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
241
}
242
243
/* 128-bit FP subtraction */
244
-Int128 HELPER(sxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
245
- uint64_t bh, uint64_t bl)
246
+Int128 HELPER(sxb)(CPUS390XState *env, Int128 a, Int128 b)
247
{
248
- float128 ret = float128_sub(make_float128(ah, al),
249
- make_float128(bh, bl),
250
- &env->fpu_status);
251
+ float128 ret = float128_sub(ARG128(a), ARG128(b), &env->fpu_status);
252
handle_exceptions(env, false, GETPC());
253
return RET128(ret);
254
}
255
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(ddb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
256
}
257
258
/* 128-bit FP division */
259
-Int128 HELPER(dxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
260
- uint64_t bh, uint64_t bl)
261
+Int128 HELPER(dxb)(CPUS390XState *env, Int128 a, Int128 b)
262
{
263
- float128 ret = float128_div(make_float128(ah, al),
264
- make_float128(bh, bl),
265
- &env->fpu_status);
266
+ float128 ret = float128_div(ARG128(a), ARG128(b), &env->fpu_status);
267
handle_exceptions(env, false, GETPC());
268
return RET128(ret);
269
}
270
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(mdeb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
271
}
272
273
/* 128-bit FP multiplication */
274
-Int128 HELPER(mxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
275
- uint64_t bh, uint64_t bl)
276
+Int128 HELPER(mxb)(CPUS390XState *env, Int128 a, Int128 b)
277
{
278
- float128 ret = float128_mul(make_float128(ah, al),
279
- make_float128(bh, bl),
280
- &env->fpu_status);
281
+ float128 ret = float128_mul(ARG128(a), ARG128(b), &env->fpu_status);
282
handle_exceptions(env, false, GETPC());
283
return RET128(ret);
284
}
285
286
/* 128/64-bit FP multiplication */
287
-Int128 HELPER(mxdb)(CPUS390XState *env, uint64_t ah, uint64_t al, uint64_t f2)
288
+Int128 HELPER(mxdb)(CPUS390XState *env, Int128 a, uint64_t f2)
289
{
290
float128 ret = float64_to_float128(f2, &env->fpu_status);
291
- ret = float128_mul(make_float128(ah, al), ret, &env->fpu_status);
292
+ ret = float128_mul(ARG128(a), ret, &env->fpu_status);
293
handle_exceptions(env, false, GETPC());
294
return RET128(ret);
295
}
296
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(ldeb)(CPUS390XState *env, uint64_t f2)
297
}
298
299
/* convert 128-bit float to 64-bit float */
300
-uint64_t HELPER(ldxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
301
- uint32_t m34)
302
+uint64_t HELPER(ldxb)(CPUS390XState *env, Int128 a, uint32_t m34)
303
{
304
int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
305
- float64 ret = float128_to_float64(make_float128(ah, al), &env->fpu_status);
306
+ float64 ret = float128_to_float64(ARG128(a), &env->fpu_status);
307
308
s390_restore_bfp_rounding_mode(env, old_mode);
309
handle_exceptions(env, xxc_from_m34(m34), GETPC());
310
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(ledb)(CPUS390XState *env, uint64_t f2, uint32_t m34)
311
}
312
313
/* convert 128-bit float to 32-bit float */
314
-uint64_t HELPER(lexb)(CPUS390XState *env, uint64_t ah, uint64_t al,
315
- uint32_t m34)
316
+uint64_t HELPER(lexb)(CPUS390XState *env, Int128 a, uint32_t m34)
317
{
318
int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
319
- float32 ret = float128_to_float32(make_float128(ah, al), &env->fpu_status);
320
+ float32 ret = float128_to_float32(ARG128(a), &env->fpu_status);
321
322
s390_restore_bfp_rounding_mode(env, old_mode);
323
handle_exceptions(env, xxc_from_m34(m34), GETPC());
324
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(cdb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
325
}
326
327
/* 128-bit FP compare */
328
-uint32_t HELPER(cxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
329
- uint64_t bh, uint64_t bl)
330
+uint32_t HELPER(cxb)(CPUS390XState *env, Int128 a, Int128 b)
331
{
332
- FloatRelation cmp = float128_compare_quiet(make_float128(ah, al),
333
- make_float128(bh, bl),
334
+ FloatRelation cmp = float128_compare_quiet(ARG128(a), ARG128(b),
335
&env->fpu_status);
336
handle_exceptions(env, false, GETPC());
337
return float_comp_to_cc(env, cmp);
338
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(cgdb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
339
}
340
341
/* convert 128-bit float to 64-bit int */
342
-uint64_t HELPER(cgxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34)
343
+uint64_t HELPER(cgxb)(CPUS390XState *env, Int128 i2, uint32_t m34)
344
{
345
int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
346
- float128 v2 = make_float128(h, l);
347
+ float128 v2 = ARG128(i2);
348
int64_t ret = float128_to_int64(v2, &env->fpu_status);
349
uint32_t cc = set_cc_conv_f128(v2, &env->fpu_status);
350
351
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(cfdb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
352
}
353
354
/* convert 128-bit float to 32-bit int */
355
-uint64_t HELPER(cfxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34)
356
+uint64_t HELPER(cfxb)(CPUS390XState *env, Int128 i2, uint32_t m34)
357
{
358
int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
359
- float128 v2 = make_float128(h, l);
360
+ float128 v2 = ARG128(i2);
361
int32_t ret = float128_to_int32(v2, &env->fpu_status);
362
uint32_t cc = set_cc_conv_f128(v2, &env->fpu_status);
363
364
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(clgdb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
365
}
366
367
/* convert 128-bit float to 64-bit uint */
368
-uint64_t HELPER(clgxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34)
369
+uint64_t HELPER(clgxb)(CPUS390XState *env, Int128 i2, uint32_t m34)
370
{
371
int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
372
- float128 v2 = make_float128(h, l);
373
+ float128 v2 = ARG128(i2);
374
uint64_t ret = float128_to_uint64(v2, &env->fpu_status);
375
uint32_t cc = set_cc_conv_f128(v2, &env->fpu_status);
376
377
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(clfdb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
378
}
379
380
/* convert 128-bit float to 32-bit uint */
381
-uint64_t HELPER(clfxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34)
382
+uint64_t HELPER(clfxb)(CPUS390XState *env, Int128 i2, uint32_t m34)
383
{
384
int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
385
- float128 v2 = make_float128(h, l);
386
+ float128 v2 = ARG128(i2);
387
uint32_t ret = float128_to_uint32(v2, &env->fpu_status);
388
uint32_t cc = set_cc_conv_f128(v2, &env->fpu_status);
389
390
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(fidb)(CPUS390XState *env, uint64_t f2, uint32_t m34)
391
}
392
393
/* round to integer 128-bit */
394
-Int128 HELPER(fixb)(CPUS390XState *env, uint64_t ah, uint64_t al, uint32_t m34)
395
+Int128 HELPER(fixb)(CPUS390XState *env, Int128 a, uint32_t m34)
396
{
397
int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
398
- float128 ret = float128_round_to_int(make_float128(ah, al),
399
- &env->fpu_status);
400
+ float128 ret = float128_round_to_int(ARG128(a), &env->fpu_status);
401
402
s390_restore_bfp_rounding_mode(env, old_mode);
403
handle_exceptions(env, xxc_from_m34(m34), GETPC());
404
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(kdb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
405
}
406
407
/* 128-bit FP compare and signal */
408
-uint32_t HELPER(kxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
409
- uint64_t bh, uint64_t bl)
410
+uint32_t HELPER(kxb)(CPUS390XState *env, Int128 a, Int128 b)
411
{
412
- FloatRelation cmp = float128_compare(make_float128(ah, al),
413
- make_float128(bh, bl),
414
+ FloatRelation cmp = float128_compare(ARG128(a), ARG128(b),
415
&env->fpu_status);
416
handle_exceptions(env, false, GETPC());
417
return float_comp_to_cc(env, cmp);
418
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(tcdb)(CPUS390XState *env, uint64_t v1, uint64_t m2)
419
}
420
421
/* test data class 128-bit */
422
-uint32_t HELPER(tcxb)(CPUS390XState *env, uint64_t ah, uint64_t al, uint64_t m2)
423
+uint32_t HELPER(tcxb)(CPUS390XState *env, Int128 a, uint64_t m2)
424
{
425
- return (m2 & float128_dcmask(env, make_float128(ah, al))) != 0;
426
+ return (m2 & float128_dcmask(env, ARG128(a))) != 0;
427
}
428
429
/* square root 32-bit */
430
@@ -XXX,XX +XXX,XX @@ uint64_t HELPER(sqdb)(CPUS390XState *env, uint64_t f2)
431
}
432
433
/* square root 128-bit */
434
-Int128 HELPER(sqxb)(CPUS390XState *env, uint64_t ah, uint64_t al)
435
+Int128 HELPER(sqxb)(CPUS390XState *env, Int128 a)
436
{
437
- float128 ret = float128_sqrt(make_float128(ah, al), &env->fpu_status);
438
+ float128 ret = float128_sqrt(ARG128(a), &env->fpu_status);
439
handle_exceptions(env, false, GETPC());
440
return RET128(ret);
441
}
442
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
443
index XXXXXXX..XXXXXXX 100644
444
--- a/target/s390x/tcg/translate.c
445
+++ b/target/s390x/tcg/translate.c
446
@@ -XXX,XX +XXX,XX @@ static TCGv_i64 load_freg32_i64(int reg)
447
return r;
448
}
449
450
+static TCGv_i128 load_freg_128(int reg)
451
+{
452
+ TCGv_i64 h = load_freg(reg);
453
+ TCGv_i64 l = load_freg(reg + 2);
454
+ TCGv_i128 r = tcg_temp_new_i128();
455
+
456
+ tcg_gen_concat_i64_i128(r, l, h);
457
+ tcg_temp_free_i64(h);
458
+ tcg_temp_free_i64(l);
459
+ return r;
460
+}
461
+
462
static void store_reg(int reg, TCGv_i64 v)
463
{
464
tcg_gen_mov_i64(regs[reg], v);
465
@@ -XXX,XX +XXX,XX @@ typedef struct {
466
bool g_out, g_out2, g_in1, g_in2;
467
TCGv_i64 out, out2, in1, in2;
468
TCGv_i64 addr1;
469
- TCGv_i128 out_128;
470
+ TCGv_i128 out_128, in1_128, in2_128;
471
} DisasOps;
472
473
/* Instructions can place constraints on their operands, raising specification
474
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_adb(DisasContext *s, DisasOps *o)
475
476
static DisasJumpType op_axb(DisasContext *s, DisasOps *o)
477
{
478
- gen_helper_axb(o->out_128, cpu_env, o->out, o->out2, o->in1, o->in2);
479
+ gen_helper_axb(o->out_128, cpu_env, o->in1_128, o->in2_128);
480
return DISAS_NEXT;
481
}
482
483
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_cdb(DisasContext *s, DisasOps *o)
484
485
static DisasJumpType op_cxb(DisasContext *s, DisasOps *o)
486
{
487
- gen_helper_cxb(cc_op, cpu_env, o->out, o->out2, o->in1, o->in2);
488
+ gen_helper_cxb(cc_op, cpu_env, o->in1_128, o->in2_128);
489
set_cc_static(s);
490
return DISAS_NEXT;
491
}
492
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_cfxb(DisasContext *s, DisasOps *o)
493
if (!m34) {
494
return DISAS_NORETURN;
495
}
496
- gen_helper_cfxb(o->out, cpu_env, o->in1, o->in2, m34);
497
+ gen_helper_cfxb(o->out, cpu_env, o->in2_128, m34);
498
tcg_temp_free_i32(m34);
499
set_cc_static(s);
500
return DISAS_NEXT;
501
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_cgxb(DisasContext *s, DisasOps *o)
502
if (!m34) {
503
return DISAS_NORETURN;
504
}
505
- gen_helper_cgxb(o->out, cpu_env, o->in1, o->in2, m34);
506
+ gen_helper_cgxb(o->out, cpu_env, o->in2_128, m34);
507
tcg_temp_free_i32(m34);
508
set_cc_static(s);
509
return DISAS_NEXT;
510
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_clfxb(DisasContext *s, DisasOps *o)
511
if (!m34) {
512
return DISAS_NORETURN;
513
}
514
- gen_helper_clfxb(o->out, cpu_env, o->in1, o->in2, m34);
515
+ gen_helper_clfxb(o->out, cpu_env, o->in2_128, m34);
516
tcg_temp_free_i32(m34);
517
set_cc_static(s);
518
return DISAS_NEXT;
519
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_clgxb(DisasContext *s, DisasOps *o)
520
if (!m34) {
521
return DISAS_NORETURN;
522
}
523
- gen_helper_clgxb(o->out, cpu_env, o->in1, o->in2, m34);
524
+ gen_helper_clgxb(o->out, cpu_env, o->in2_128, m34);
525
tcg_temp_free_i32(m34);
526
set_cc_static(s);
527
return DISAS_NEXT;
528
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_ddb(DisasContext *s, DisasOps *o)
529
530
static DisasJumpType op_dxb(DisasContext *s, DisasOps *o)
531
{
532
- gen_helper_dxb(o->out_128, cpu_env, o->out, o->out2, o->in1, o->in2);
533
+ gen_helper_dxb(o->out_128, cpu_env, o->in1_128, o->in2_128);
534
return DISAS_NEXT;
535
}
536
537
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_fixb(DisasContext *s, DisasOps *o)
538
if (!m34) {
539
return DISAS_NORETURN;
540
}
541
- gen_helper_fixb(o->out_128, cpu_env, o->in1, o->in2, m34);
542
+ gen_helper_fixb(o->out_128, cpu_env, o->in2_128, m34);
543
tcg_temp_free_i32(m34);
544
return DISAS_NEXT;
545
}
546
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_kdb(DisasContext *s, DisasOps *o)
547
548
static DisasJumpType op_kxb(DisasContext *s, DisasOps *o)
549
{
550
- gen_helper_kxb(cc_op, cpu_env, o->out, o->out2, o->in1, o->in2);
551
+ gen_helper_kxb(cc_op, cpu_env, o->in1_128, o->in2_128);
552
set_cc_static(s);
553
return DISAS_NEXT;
554
}
555
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_ldxb(DisasContext *s, DisasOps *o)
556
if (!m34) {
557
return DISAS_NORETURN;
558
}
559
- gen_helper_ldxb(o->out, cpu_env, o->in1, o->in2, m34);
560
+ gen_helper_ldxb(o->out, cpu_env, o->in2_128, m34);
561
tcg_temp_free_i32(m34);
562
return DISAS_NEXT;
563
}
564
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_lexb(DisasContext *s, DisasOps *o)
565
if (!m34) {
566
return DISAS_NORETURN;
567
}
568
- gen_helper_lexb(o->out, cpu_env, o->in1, o->in2, m34);
569
+ gen_helper_lexb(o->out, cpu_env, o->in2_128, m34);
570
tcg_temp_free_i32(m34);
571
return DISAS_NEXT;
572
}
573
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_mdb(DisasContext *s, DisasOps *o)
574
575
static DisasJumpType op_mxb(DisasContext *s, DisasOps *o)
576
{
577
- gen_helper_mxb(o->out_128, cpu_env, o->out, o->out2, o->in1, o->in2);
578
+ gen_helper_mxb(o->out_128, cpu_env, o->in1_128, o->in2_128);
579
return DISAS_NEXT;
580
}
581
582
static DisasJumpType op_mxdb(DisasContext *s, DisasOps *o)
583
{
584
- gen_helper_mxdb(o->out_128, cpu_env, o->out, o->out2, o->in2);
585
+ gen_helper_mxdb(o->out_128, cpu_env, o->in1_128, o->in2);
586
return DISAS_NEXT;
587
}
588
589
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_sdb(DisasContext *s, DisasOps *o)
590
591
static DisasJumpType op_sxb(DisasContext *s, DisasOps *o)
592
{
593
- gen_helper_sxb(o->out_128, cpu_env, o->out, o->out2, o->in1, o->in2);
594
+ gen_helper_sxb(o->out_128, cpu_env, o->in1_128, o->in2_128);
595
return DISAS_NEXT;
596
}
597
598
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_sqdb(DisasContext *s, DisasOps *o)
599
600
static DisasJumpType op_sqxb(DisasContext *s, DisasOps *o)
601
{
602
- gen_helper_sqxb(o->out_128, cpu_env, o->in1, o->in2);
603
+ gen_helper_sqxb(o->out_128, cpu_env, o->in2_128);
604
return DISAS_NEXT;
605
}
606
607
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_tcdb(DisasContext *s, DisasOps *o)
608
609
static DisasJumpType op_tcxb(DisasContext *s, DisasOps *o)
610
{
611
- gen_helper_tcxb(cc_op, cpu_env, o->out, o->out2, o->in2);
612
+ gen_helper_tcxb(cc_op, cpu_env, o->in1_128, o->in2);
613
set_cc_static(s);
614
return DISAS_NEXT;
615
}
616
@@ -XXX,XX +XXX,XX @@ static void prep_new_P(DisasContext *s, DisasOps *o)
617
618
static void prep_new_x(DisasContext *s, DisasOps *o)
619
{
620
- o->out = tcg_temp_new_i64();
621
- o->out2 = tcg_temp_new_i64();
622
o->out_128 = tcg_temp_new_i128();
623
}
624
#define SPEC_prep_new_x 0
625
@@ -XXX,XX +XXX,XX @@ static void prep_r1_P(DisasContext *s, DisasOps *o)
626
627
static void prep_x1(DisasContext *s, DisasOps *o)
628
{
629
- o->out = load_freg(get_field(s, r1));
630
- o->out2 = load_freg(get_field(s, r1) + 2);
631
- o->out_128 = tcg_temp_new_i128();
632
- tcg_gen_concat_i64_i128(o->out_128, o->out2, o->out);
633
+ o->out_128 = load_freg_128(get_field(s, r1));
634
}
635
#define SPEC_prep_x1 SPEC_r1_f128
636
637
@@ -XXX,XX +XXX,XX @@ static void wout_x1(DisasContext *s, DisasOps *o)
638
{
639
int f1 = get_field(s, r1);
640
641
+ /* Split out_128 into out+out2 for cout_f128. */
642
+ tcg_debug_assert(o->out == NULL);
643
+ o->out = tcg_temp_new_i64();
644
+ o->out2 = tcg_temp_new_i64();
645
+
646
tcg_gen_extr_i128_i64(o->out2, o->out, o->out_128);
647
store_freg(f1, o->out);
648
store_freg(f1 + 2, o->out2);
649
@@ -XXX,XX +XXX,XX @@ static void in1_f1(DisasContext *s, DisasOps *o)
650
}
651
#define SPEC_in1_f1 0
652
653
+static void in1_x1(DisasContext *s, DisasOps *o)
654
+{
655
+ o->in1_128 = load_freg_128(get_field(s, r1));
656
+}
657
+#define SPEC_in1_x1 SPEC_r1_f128
658
+
659
/* Load the high double word of an extended (128-bit) format FP number */
660
static void in1_x2h(DisasContext *s, DisasOps *o)
661
{
662
@@ -XXX,XX +XXX,XX @@ static void in2_f2(DisasContext *s, DisasOps *o)
663
}
664
#define SPEC_in2_f2 0
665
666
+static void in2_x2(DisasContext *s, DisasOps *o)
667
+{
668
+ o->in2_128 = load_freg_128(get_field(s, r2));
669
+}
670
+#define SPEC_in2_x2 SPEC_r2_f128
671
+
672
/* Load the low double word of an extended (128-bit) format FP number */
673
static void in2_x2l(DisasContext *s, DisasOps *o)
674
{
675
@@ -XXX,XX +XXX,XX @@ static DisasJumpType translate_one(CPUS390XState *env, DisasContext *s)
676
if (o.out_128) {
677
tcg_temp_free_i128(o.out_128);
678
}
679
+ if (o.in1_128) {
680
+ tcg_temp_free_i128(o.in1_128);
681
+ }
682
+ if (o.in2_128) {
683
+ tcg_temp_free_i128(o.in2_128);
684
+ }
685
/* io should be the last instruction in tb when icount is enabled */
686
if (unlikely(icount && ret == DISAS_NEXT)) {
687
ret = DISAS_TOO_MANY;
688
--
689
2.34.1
690
691
diff view generated by jsdifflib
New patch
1
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
target/s390x/helper.h | 2 --
5
target/s390x/tcg/insn-data.h.inc | 2 +-
6
target/s390x/tcg/mem_helper.c | 52 ------------------------------
7
target/s390x/tcg/translate.c | 55 +++++++++++++++++++-------------
8
4 files changed, 33 insertions(+), 78 deletions(-)
1
9
10
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/s390x/helper.h
13
+++ b/target/s390x/helper.h
14
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_3(cxgb, i128, env, s64, i32)
15
DEF_HELPER_3(celgb, i64, env, i64, i32)
16
DEF_HELPER_3(cdlgb, i64, env, i64, i32)
17
DEF_HELPER_3(cxlgb, i128, env, i64, i32)
18
-DEF_HELPER_4(cdsg, void, env, i64, i32, i32)
19
-DEF_HELPER_4(cdsg_parallel, void, env, i64, i32, i32)
20
DEF_HELPER_4(csst, i32, env, i32, i64, i64)
21
DEF_HELPER_4(csst_parallel, i32, env, i32, i64, i64)
22
DEF_HELPER_FLAGS_3(aeb, TCG_CALL_NO_WG, i64, env, i64, i64)
23
diff --git a/target/s390x/tcg/insn-data.h.inc b/target/s390x/tcg/insn-data.h.inc
24
index XXXXXXX..XXXXXXX 100644
25
--- a/target/s390x/tcg/insn-data.h.inc
26
+++ b/target/s390x/tcg/insn-data.h.inc
27
@@ -XXX,XX +XXX,XX @@
28
/* COMPARE DOUBLE AND SWAP */
29
D(0xbb00, CDS, RS_a, Z, r3_D32, r1_D32, new, r1_D32, cs, 0, MO_TEUQ)
30
D(0xeb31, CDSY, RSY_a, LD, r3_D32, r1_D32, new, r1_D32, cs, 0, MO_TEUQ)
31
- C(0xeb3e, CDSG, RSY_a, Z, 0, 0, 0, 0, cdsg, 0)
32
+ C(0xeb3e, CDSG, RSY_a, Z, la2, r3_D64, 0, r1_D64, cdsg, 0)
33
/* COMPARE AND SWAP AND STORE */
34
C(0xc802, CSST, SSF, CASS, la1, a2, 0, 0, csst, 0)
35
36
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/target/s390x/tcg/mem_helper.c
39
+++ b/target/s390x/tcg/mem_helper.c
40
@@ -XXX,XX +XXX,XX @@ uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
41
return cc;
42
}
43
44
-void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
45
- uint32_t r1, uint32_t r3)
46
-{
47
- uintptr_t ra = GETPC();
48
- Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
49
- Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
50
- Int128 oldv;
51
- uint64_t oldh, oldl;
52
- bool fail;
53
-
54
- check_alignment(env, addr, 16, ra);
55
-
56
- oldh = cpu_ldq_data_ra(env, addr + 0, ra);
57
- oldl = cpu_ldq_data_ra(env, addr + 8, ra);
58
-
59
- oldv = int128_make128(oldl, oldh);
60
- fail = !int128_eq(oldv, cmpv);
61
- if (fail) {
62
- newv = oldv;
63
- }
64
-
65
- cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
66
- cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
67
-
68
- env->cc_op = fail;
69
- env->regs[r1] = int128_gethi(oldv);
70
- env->regs[r1 + 1] = int128_getlo(oldv);
71
-}
72
-
73
-void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
74
- uint32_t r1, uint32_t r3)
75
-{
76
- uintptr_t ra = GETPC();
77
- Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
78
- Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
79
- int mem_idx;
80
- MemOpIdx oi;
81
- Int128 oldv;
82
- bool fail;
83
-
84
- assert(HAVE_CMPXCHG128);
85
-
86
- mem_idx = cpu_mmu_index(env, false);
87
- oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
88
- oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
89
- fail = !int128_eq(oldv, cmpv);
90
-
91
- env->cc_op = fail;
92
- env->regs[r1] = int128_gethi(oldv);
93
- env->regs[r1 + 1] = int128_getlo(oldv);
94
-}
95
-
96
static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
97
uint64_t a2, bool parallel)
98
{
99
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
100
index XXXXXXX..XXXXXXX 100644
101
--- a/target/s390x/tcg/translate.c
102
+++ b/target/s390x/tcg/translate.c
103
@@ -XXX,XX +XXX,XX @@ static DisasJumpType op_cs(DisasContext *s, DisasOps *o)
104
static DisasJumpType op_cdsg(DisasContext *s, DisasOps *o)
105
{
106
int r1 = get_field(s, r1);
107
- int r3 = get_field(s, r3);
108
- int d2 = get_field(s, d2);
109
- int b2 = get_field(s, b2);
110
- DisasJumpType ret = DISAS_NEXT;
111
- TCGv_i64 addr;
112
- TCGv_i32 t_r1, t_r3;
113
114
- /* Note that R1:R1+1 = expected value and R3:R3+1 = new value. */
115
- addr = get_address(s, 0, b2, d2);
116
- t_r1 = tcg_const_i32(r1);
117
- t_r3 = tcg_const_i32(r3);
118
- if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
119
- gen_helper_cdsg(cpu_env, addr, t_r1, t_r3);
120
- } else if (HAVE_CMPXCHG128) {
121
- gen_helper_cdsg_parallel(cpu_env, addr, t_r1, t_r3);
122
- } else {
123
- gen_helper_exit_atomic(cpu_env);
124
- ret = DISAS_NORETURN;
125
- }
126
- tcg_temp_free_i64(addr);
127
- tcg_temp_free_i32(t_r1);
128
- tcg_temp_free_i32(t_r3);
129
+ o->out_128 = tcg_temp_new_i128();
130
+ tcg_gen_concat_i64_i128(o->out_128, regs[r1 + 1], regs[r1]);
131
132
- set_cc_static(s);
133
- return ret;
134
+ /* Note out (R1:R1+1) = expected value and in2 (R3:R3+1) = new value. */
135
+ tcg_gen_atomic_cmpxchg_i128(o->out_128, o->addr1, o->out_128, o->in2_128,
136
+ get_mem_index(s), MO_BE | MO_128 | MO_ALIGN);
137
+
138
+ /*
139
+ * Extract result into cc_dst:cc_src, compare vs the expected value
140
+ * in the as yet unmodified input registers, then update CC_OP.
141
+ */
142
+ tcg_gen_extr_i128_i64(cc_src, cc_dst, o->out_128);
143
+ tcg_gen_xor_i64(cc_dst, cc_dst, regs[r1]);
144
+ tcg_gen_xor_i64(cc_src, cc_src, regs[r1 + 1]);
145
+ tcg_gen_or_i64(cc_dst, cc_dst, cc_src);
146
+ set_cc_nz_u64(s, cc_dst);
147
+
148
+ return DISAS_NEXT;
149
}
150
151
static DisasJumpType op_csst(DisasContext *s, DisasOps *o)
152
@@ -XXX,XX +XXX,XX @@ static void wout_r1_D32(DisasContext *s, DisasOps *o)
153
}
154
#define SPEC_wout_r1_D32 SPEC_r1_even
155
156
+static void wout_r1_D64(DisasContext *s, DisasOps *o)
157
+{
158
+ int r1 = get_field(s, r1);
159
+ tcg_gen_extr_i128_i64(regs[r1 + 1], regs[r1], o->out_128);
160
+}
161
+#define SPEC_wout_r1_D64 SPEC_r1_even
162
+
163
static void wout_r3_P32(DisasContext *s, DisasOps *o)
164
{
165
int r3 = get_field(s, r3);
166
@@ -XXX,XX +XXX,XX @@ static void in2_r3(DisasContext *s, DisasOps *o)
167
}
168
#define SPEC_in2_r3 0
169
170
+static void in2_r3_D64(DisasContext *s, DisasOps *o)
171
+{
172
+ int r3 = get_field(s, r3);
173
+ o->in2_128 = tcg_temp_new_i128();
174
+ tcg_gen_concat_i64_i128(o->in2_128, regs[r3 + 1], regs[r3]);
175
+}
176
+#define SPEC_in2_r3_D64 SPEC_r3_even
177
+
178
static void in2_r3_sr32(DisasContext *s, DisasOps *o)
179
{
180
o->in2 = tcg_temp_new_i64();
181
--
182
2.34.1
diff view generated by jsdifflib
New patch
1
This case is trivial to implement inline.
1
2
3
Reviewed-by: David Hildenbrand <david@redhat.com>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
target/s390x/tcg/translate.c | 3 +++
7
1 file changed, 3 insertions(+)
8
9
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/target/s390x/tcg/translate.c
12
+++ b/target/s390x/tcg/translate.c
13
@@ -XXX,XX +XXX,XX @@ static void gen_op_calc_cc(DisasContext *s)
14
/* env->cc_op already is the cc value */
15
break;
16
case CC_OP_NZ:
17
+ tcg_gen_setcondi_i64(TCG_COND_NE, cc_dst, cc_dst, 0);
18
+ tcg_gen_extrl_i64_i32(cc_op, cc_dst);
19
+ break;
20
case CC_OP_ABS_64:
21
case CC_OP_NABS_64:
22
case CC_OP_ABS_32:
23
--
24
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
target/i386/tcg/translate.c | 48 ++++++++++++++++++++++++-------------
5
1 file changed, 31 insertions(+), 17 deletions(-)
1
6
7
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/target/i386/tcg/translate.c
10
+++ b/target/i386/tcg/translate.c
11
@@ -XXX,XX +XXX,XX @@ static void gen_sty_env_A0(DisasContext *s, int offset, bool align)
12
#include "emit.c.inc"
13
#include "decode-new.c.inc"
14
15
+static void gen_cmpxchg8b(DisasContext *s, CPUX86State *env, int modrm)
16
+{
17
+ gen_lea_modrm(env, s, modrm);
18
+
19
+ if ((s->prefix & PREFIX_LOCK) &&
20
+ (tb_cflags(s->base.tb) & CF_PARALLEL)) {
21
+ gen_helper_cmpxchg8b(cpu_env, s->A0);
22
+ } else {
23
+ gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
24
+ }
25
+ set_cc_op(s, CC_OP_EFLAGS);
26
+}
27
+
28
+#ifdef TARGET_X86_64
29
+static void gen_cmpxchg16b(DisasContext *s, CPUX86State *env, int modrm)
30
+{
31
+ gen_lea_modrm(env, s, modrm);
32
+
33
+ if ((s->prefix & PREFIX_LOCK) &&
34
+ (tb_cflags(s->base.tb) & CF_PARALLEL)) {
35
+ gen_helper_cmpxchg16b(cpu_env, s->A0);
36
+ } else {
37
+ gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
38
+ }
39
+ set_cc_op(s, CC_OP_EFLAGS);
40
+}
41
+#endif
42
+
43
/* convert one instruction. s->base.is_jmp is set if the translation must
44
be stopped. Return the next pc value */
45
static bool disas_insn(DisasContext *s, CPUState *cpu)
46
@@ -XXX,XX +XXX,XX @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
47
if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) {
48
goto illegal_op;
49
}
50
- gen_lea_modrm(env, s, modrm);
51
- if ((s->prefix & PREFIX_LOCK) &&
52
- (tb_cflags(s->base.tb) & CF_PARALLEL)) {
53
- gen_helper_cmpxchg16b(cpu_env, s->A0);
54
- } else {
55
- gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
56
- }
57
- set_cc_op(s, CC_OP_EFLAGS);
58
+ gen_cmpxchg16b(s, env, modrm);
59
break;
60
}
61
-#endif
62
+#endif
63
if (!(s->cpuid_features & CPUID_CX8)) {
64
goto illegal_op;
65
}
66
- gen_lea_modrm(env, s, modrm);
67
- if ((s->prefix & PREFIX_LOCK) &&
68
- (tb_cflags(s->base.tb) & CF_PARALLEL)) {
69
- gen_helper_cmpxchg8b(cpu_env, s->A0);
70
- } else {
71
- gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
72
- }
73
- set_cc_op(s, CC_OP_EFLAGS);
74
+ gen_cmpxchg8b(s, env, modrm);
75
break;
76
77
case 7: /* RDSEED */
78
--
79
2.34.1
80
81
diff view generated by jsdifflib
New patch
1
Use tcg_gen_atomic_cmpxchg_i64 for the atomic case,
2
and tcg_gen_nonatomic_cmpxchg_i64 otherwise.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
target/i386/helper.h | 2 --
9
target/i386/tcg/mem_helper.c | 57 ------------------------------------
10
target/i386/tcg/translate.c | 54 ++++++++++++++++++++++++++++++----
11
3 files changed, 49 insertions(+), 64 deletions(-)
12
13
diff --git a/target/i386/helper.h b/target/i386/helper.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/target/i386/helper.h
16
+++ b/target/i386/helper.h
17
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_1(rsm, void, env)
18
#endif /* !CONFIG_USER_ONLY */
19
20
DEF_HELPER_2(into, void, env, int)
21
-DEF_HELPER_2(cmpxchg8b_unlocked, void, env, tl)
22
-DEF_HELPER_2(cmpxchg8b, void, env, tl)
23
#ifdef TARGET_X86_64
24
DEF_HELPER_2(cmpxchg16b_unlocked, void, env, tl)
25
DEF_HELPER_2(cmpxchg16b, void, env, tl)
26
diff --git a/target/i386/tcg/mem_helper.c b/target/i386/tcg/mem_helper.c
27
index XXXXXXX..XXXXXXX 100644
28
--- a/target/i386/tcg/mem_helper.c
29
+++ b/target/i386/tcg/mem_helper.c
30
@@ -XXX,XX +XXX,XX @@
31
#include "tcg/tcg.h"
32
#include "helper-tcg.h"
33
34
-void helper_cmpxchg8b_unlocked(CPUX86State *env, target_ulong a0)
35
-{
36
- uintptr_t ra = GETPC();
37
- uint64_t oldv, cmpv, newv;
38
- int eflags;
39
-
40
- eflags = cpu_cc_compute_all(env, CC_OP);
41
-
42
- cmpv = deposit64(env->regs[R_EAX], 32, 32, env->regs[R_EDX]);
43
- newv = deposit64(env->regs[R_EBX], 32, 32, env->regs[R_ECX]);
44
-
45
- oldv = cpu_ldq_data_ra(env, a0, ra);
46
- newv = (cmpv == oldv ? newv : oldv);
47
- /* always do the store */
48
- cpu_stq_data_ra(env, a0, newv, ra);
49
-
50
- if (oldv == cmpv) {
51
- eflags |= CC_Z;
52
- } else {
53
- env->regs[R_EAX] = (uint32_t)oldv;
54
- env->regs[R_EDX] = (uint32_t)(oldv >> 32);
55
- eflags &= ~CC_Z;
56
- }
57
- CC_SRC = eflags;
58
-}
59
-
60
-void helper_cmpxchg8b(CPUX86State *env, target_ulong a0)
61
-{
62
-#ifdef CONFIG_ATOMIC64
63
- uint64_t oldv, cmpv, newv;
64
- int eflags;
65
-
66
- eflags = cpu_cc_compute_all(env, CC_OP);
67
-
68
- cmpv = deposit64(env->regs[R_EAX], 32, 32, env->regs[R_EDX]);
69
- newv = deposit64(env->regs[R_EBX], 32, 32, env->regs[R_ECX]);
70
-
71
- {
72
- uintptr_t ra = GETPC();
73
- int mem_idx = cpu_mmu_index(env, false);
74
- MemOpIdx oi = make_memop_idx(MO_TEUQ, mem_idx);
75
- oldv = cpu_atomic_cmpxchgq_le_mmu(env, a0, cmpv, newv, oi, ra);
76
- }
77
-
78
- if (oldv == cmpv) {
79
- eflags |= CC_Z;
80
- } else {
81
- env->regs[R_EAX] = (uint32_t)oldv;
82
- env->regs[R_EDX] = (uint32_t)(oldv >> 32);
83
- eflags &= ~CC_Z;
84
- }
85
- CC_SRC = eflags;
86
-#else
87
- cpu_loop_exit_atomic(env_cpu(env), GETPC());
88
-#endif /* CONFIG_ATOMIC64 */
89
-}
90
-
91
#ifdef TARGET_X86_64
92
void helper_cmpxchg16b_unlocked(CPUX86State *env, target_ulong a0)
93
{
94
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
95
index XXXXXXX..XXXXXXX 100644
96
--- a/target/i386/tcg/translate.c
97
+++ b/target/i386/tcg/translate.c
98
@@ -XXX,XX +XXX,XX @@ static void gen_sty_env_A0(DisasContext *s, int offset, bool align)
99
100
static void gen_cmpxchg8b(DisasContext *s, CPUX86State *env, int modrm)
101
{
102
+ TCGv_i64 cmp, val, old;
103
+ TCGv Z;
104
+
105
gen_lea_modrm(env, s, modrm);
106
107
- if ((s->prefix & PREFIX_LOCK) &&
108
- (tb_cflags(s->base.tb) & CF_PARALLEL)) {
109
- gen_helper_cmpxchg8b(cpu_env, s->A0);
110
+ cmp = tcg_temp_new_i64();
111
+ val = tcg_temp_new_i64();
112
+ old = tcg_temp_new_i64();
113
+
114
+ /* Construct the comparison values from the register pair. */
115
+ tcg_gen_concat_tl_i64(cmp, cpu_regs[R_EAX], cpu_regs[R_EDX]);
116
+ tcg_gen_concat_tl_i64(val, cpu_regs[R_EBX], cpu_regs[R_ECX]);
117
+
118
+ /* Only require atomic with LOCK; non-parallel handled in generator. */
119
+ if (s->prefix & PREFIX_LOCK) {
120
+ tcg_gen_atomic_cmpxchg_i64(old, s->A0, cmp, val, s->mem_index, MO_TEUQ);
121
} else {
122
- gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
123
+ tcg_gen_nonatomic_cmpxchg_i64(old, s->A0, cmp, val,
124
+ s->mem_index, MO_TEUQ);
125
}
126
- set_cc_op(s, CC_OP_EFLAGS);
127
+ tcg_temp_free_i64(val);
128
+
129
+ /* Set tmp0 to match the required value of Z. */
130
+ tcg_gen_setcond_i64(TCG_COND_EQ, cmp, old, cmp);
131
+ Z = tcg_temp_new();
132
+ tcg_gen_trunc_i64_tl(Z, cmp);
133
+ tcg_temp_free_i64(cmp);
134
+
135
+ /*
136
+ * Extract the result values for the register pair.
137
+ * For 32-bit, we may do this unconditionally, because on success (Z=1),
138
+ * the old value matches the previous value in EDX:EAX. For x86_64,
139
+ * the store must be conditional, because we must leave the source
140
+ * registers unchanged on success, and zero-extend the writeback
141
+ * on failure (Z=0).
142
+ */
143
+ if (TARGET_LONG_BITS == 32) {
144
+ tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], old);
145
+ } else {
146
+ TCGv zero = tcg_constant_tl(0);
147
+
148
+ tcg_gen_extr_i64_tl(s->T0, s->T1, old);
149
+ tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EAX], Z, zero,
150
+ s->T0, cpu_regs[R_EAX]);
151
+ tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EDX], Z, zero,
152
+ s->T1, cpu_regs[R_EDX]);
153
+ }
154
+ tcg_temp_free_i64(old);
155
+
156
+ /* Update Z. */
157
+ gen_compute_eflags(s);
158
+ tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, Z, ctz32(CC_Z), 1);
159
+ tcg_temp_free(Z);
160
}
161
162
#ifdef TARGET_X86_64
163
--
164
2.34.1
165
166
diff view generated by jsdifflib
New patch
1
Use tcg_gen_atomic_cmpxchg_i128 for the atomic case,
2
and tcg_gen_qemu_ld/st_i128 otherwise.
1
3
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/i386/helper.h | 4 ---
8
target/i386/tcg/mem_helper.c | 69 ------------------------------------
9
target/i386/tcg/translate.c | 44 ++++++++++++++++++++---
10
3 files changed, 39 insertions(+), 78 deletions(-)
11
12
diff --git a/target/i386/helper.h b/target/i386/helper.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/target/i386/helper.h
15
+++ b/target/i386/helper.h
16
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_1(rsm, void, env)
17
#endif /* !CONFIG_USER_ONLY */
18
19
DEF_HELPER_2(into, void, env, int)
20
-#ifdef TARGET_X86_64
21
-DEF_HELPER_2(cmpxchg16b_unlocked, void, env, tl)
22
-DEF_HELPER_2(cmpxchg16b, void, env, tl)
23
-#endif
24
DEF_HELPER_FLAGS_1(single_step, TCG_CALL_NO_WG, noreturn, env)
25
DEF_HELPER_1(rechecking_single_step, void, env)
26
DEF_HELPER_1(cpuid, void, env)
27
diff --git a/target/i386/tcg/mem_helper.c b/target/i386/tcg/mem_helper.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/target/i386/tcg/mem_helper.c
30
+++ b/target/i386/tcg/mem_helper.c
31
@@ -XXX,XX +XXX,XX @@
32
#include "tcg/tcg.h"
33
#include "helper-tcg.h"
34
35
-#ifdef TARGET_X86_64
36
-void helper_cmpxchg16b_unlocked(CPUX86State *env, target_ulong a0)
37
-{
38
- uintptr_t ra = GETPC();
39
- Int128 oldv, cmpv, newv;
40
- uint64_t o0, o1;
41
- int eflags;
42
- bool success;
43
-
44
- if ((a0 & 0xf) != 0) {
45
- raise_exception_ra(env, EXCP0D_GPF, GETPC());
46
- }
47
- eflags = cpu_cc_compute_all(env, CC_OP);
48
-
49
- cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]);
50
- newv = int128_make128(env->regs[R_EBX], env->regs[R_ECX]);
51
-
52
- o0 = cpu_ldq_data_ra(env, a0 + 0, ra);
53
- o1 = cpu_ldq_data_ra(env, a0 + 8, ra);
54
-
55
- oldv = int128_make128(o0, o1);
56
- success = int128_eq(oldv, cmpv);
57
- if (!success) {
58
- newv = oldv;
59
- }
60
-
61
- cpu_stq_data_ra(env, a0 + 0, int128_getlo(newv), ra);
62
- cpu_stq_data_ra(env, a0 + 8, int128_gethi(newv), ra);
63
-
64
- if (success) {
65
- eflags |= CC_Z;
66
- } else {
67
- env->regs[R_EAX] = int128_getlo(oldv);
68
- env->regs[R_EDX] = int128_gethi(oldv);
69
- eflags &= ~CC_Z;
70
- }
71
- CC_SRC = eflags;
72
-}
73
-
74
-void helper_cmpxchg16b(CPUX86State *env, target_ulong a0)
75
-{
76
- uintptr_t ra = GETPC();
77
-
78
- if ((a0 & 0xf) != 0) {
79
- raise_exception_ra(env, EXCP0D_GPF, ra);
80
- } else if (HAVE_CMPXCHG128) {
81
- int eflags = cpu_cc_compute_all(env, CC_OP);
82
-
83
- Int128 cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]);
84
- Int128 newv = int128_make128(env->regs[R_EBX], env->regs[R_ECX]);
85
-
86
- int mem_idx = cpu_mmu_index(env, false);
87
- MemOpIdx oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
88
- Int128 oldv = cpu_atomic_cmpxchgo_le_mmu(env, a0, cmpv, newv, oi, ra);
89
-
90
- if (int128_eq(oldv, cmpv)) {
91
- eflags |= CC_Z;
92
- } else {
93
- env->regs[R_EAX] = int128_getlo(oldv);
94
- env->regs[R_EDX] = int128_gethi(oldv);
95
- eflags &= ~CC_Z;
96
- }
97
- CC_SRC = eflags;
98
- } else {
99
- cpu_loop_exit_atomic(env_cpu(env), ra);
100
- }
101
-}
102
-#endif
103
-
104
void helper_boundw(CPUX86State *env, target_ulong a0, int v)
105
{
106
int low, high;
107
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
108
index XXXXXXX..XXXXXXX 100644
109
--- a/target/i386/tcg/translate.c
110
+++ b/target/i386/tcg/translate.c
111
@@ -XXX,XX +XXX,XX @@ static void gen_cmpxchg8b(DisasContext *s, CPUX86State *env, int modrm)
112
#ifdef TARGET_X86_64
113
static void gen_cmpxchg16b(DisasContext *s, CPUX86State *env, int modrm)
114
{
115
+ MemOp mop = MO_TE | MO_128 | MO_ALIGN;
116
+ TCGv_i64 t0, t1;
117
+ TCGv_i128 cmp, val;
118
+
119
gen_lea_modrm(env, s, modrm);
120
121
- if ((s->prefix & PREFIX_LOCK) &&
122
- (tb_cflags(s->base.tb) & CF_PARALLEL)) {
123
- gen_helper_cmpxchg16b(cpu_env, s->A0);
124
+ cmp = tcg_temp_new_i128();
125
+ val = tcg_temp_new_i128();
126
+ tcg_gen_concat_i64_i128(cmp, cpu_regs[R_EAX], cpu_regs[R_EDX]);
127
+ tcg_gen_concat_i64_i128(val, cpu_regs[R_EBX], cpu_regs[R_ECX]);
128
+
129
+ /* Only require atomic with LOCK; non-parallel handled in generator. */
130
+ if (s->prefix & PREFIX_LOCK) {
131
+ tcg_gen_atomic_cmpxchg_i128(val, s->A0, cmp, val, s->mem_index, mop);
132
} else {
133
- gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
134
+ tcg_gen_nonatomic_cmpxchg_i128(val, s->A0, cmp, val, s->mem_index, mop);
135
}
136
- set_cc_op(s, CC_OP_EFLAGS);
137
+
138
+ tcg_gen_extr_i128_i64(s->T0, s->T1, val);
139
+ tcg_temp_free_i128(cmp);
140
+ tcg_temp_free_i128(val);
141
+
142
+ /* Determine success after the fact. */
143
+ t0 = tcg_temp_new_i64();
144
+ t1 = tcg_temp_new_i64();
145
+ tcg_gen_xor_i64(t0, s->T0, cpu_regs[R_EAX]);
146
+ tcg_gen_xor_i64(t1, s->T1, cpu_regs[R_EDX]);
147
+ tcg_gen_or_i64(t0, t0, t1);
148
+ tcg_temp_free_i64(t1);
149
+
150
+ /* Update Z. */
151
+ gen_compute_eflags(s);
152
+ tcg_gen_setcondi_i64(TCG_COND_EQ, t0, t0, 0);
153
+ tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, t0, ctz32(CC_Z), 1);
154
+ tcg_temp_free_i64(t0);
155
+
156
+ /*
157
+ * Extract the result values for the register pair. We may do this
158
+ * unconditionally, because on success (Z=1), the old value matches
159
+ * the previous value in RDX:RAX.
160
+ */
161
+ tcg_gen_mov_i64(cpu_regs[R_EAX], s->T0);
162
+ tcg_gen_mov_i64(cpu_regs[R_EDX], s->T1);
163
}
164
#endif
165
166
--
167
2.34.1
168
169
diff view generated by jsdifflib
1
From: Thomas Huth <thuth@redhat.com>
1
'offset' should be bits [23:5] of LDR instruction, rather than [4:0].
2
2
3
This header only defines the tcg_allowed variable and the tcg_enabled()
3
Fixes: d59d83a1c388 ("tcg/aarch64: Reorg goto_tb implementation")
4
function - which are not required in many files that include this
4
Reviewed-by: Zenghui Yu <yuzenghui@huawei.com>
5
header. Drop the #include statement there.
5
Reported-by: Zenghui Yu <yuzenghui@huawei.com>
6
7
Signed-off-by: Thomas Huth <thuth@redhat.com>
8
Reviewed-by: Markus Armbruster <armbru@redhat.com>
9
Message-Id: <20220315144107.1012530-1-thuth@redhat.com>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
7
---
12
accel/tcg/hmp.c | 1 -
8
tcg/aarch64/tcg-target.c.inc | 2 +-
13
accel/tcg/tcg-accel-ops-icount.c | 1 -
9
1 file changed, 1 insertion(+), 1 deletion(-)
14
bsd-user/main.c | 1 -
15
hw/virtio/vhost.c | 1 -
16
linux-user/main.c | 1 -
17
monitor/misc.c | 1 -
18
target/arm/helper.c | 1 -
19
target/s390x/cpu_models_sysemu.c | 1 -
20
target/s390x/helper.c | 1 -
21
9 files changed, 9 deletions(-)
22
10
23
diff --git a/accel/tcg/hmp.c b/accel/tcg/hmp.c
11
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
24
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
25
--- a/accel/tcg/hmp.c
13
--- a/tcg/aarch64/tcg-target.c.inc
26
+++ b/accel/tcg/hmp.c
14
+++ b/tcg/aarch64/tcg-target.c.inc
27
@@ -XXX,XX +XXX,XX @@
15
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
28
#include "qapi/qapi-commands-machine.h"
16
ptrdiff_t i_offset = i_addr - jmp_rx;
29
#include "exec/exec-all.h"
17
30
#include "monitor/monitor.h"
18
/* Note that we asserted this in range in tcg_out_goto_tb. */
31
-#include "sysemu/tcg.h"
19
- insn = deposit32(I3305_LDR | TCG_REG_TMP, 0, 5, i_offset >> 2);
32
20
+ insn = deposit32(I3305_LDR | TCG_REG_TMP, 5, 19, i_offset >> 2);
33
static void hmp_tcg_register(void)
21
}
34
{
22
qatomic_set((uint32_t *)jmp_rw, insn);
35
diff --git a/accel/tcg/tcg-accel-ops-icount.c b/accel/tcg/tcg-accel-ops-icount.c
23
flush_idcache_range(jmp_rx, jmp_rw, 4);
36
index XXXXXXX..XXXXXXX 100644
37
--- a/accel/tcg/tcg-accel-ops-icount.c
38
+++ b/accel/tcg/tcg-accel-ops-icount.c
39
@@ -XXX,XX +XXX,XX @@
40
*/
41
42
#include "qemu/osdep.h"
43
-#include "sysemu/tcg.h"
44
#include "sysemu/replay.h"
45
#include "sysemu/cpu-timers.h"
46
#include "qemu/main-loop.h"
47
diff --git a/bsd-user/main.c b/bsd-user/main.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/bsd-user/main.c
50
+++ b/bsd-user/main.c
51
@@ -XXX,XX +XXX,XX @@
52
#include "qemu-common.h"
53
#include "qemu/units.h"
54
#include "qemu/accel.h"
55
-#include "sysemu/tcg.h"
56
#include "qemu-version.h"
57
#include <machine/trap.h>
58
59
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
60
index XXXXXXX..XXXXXXX 100644
61
--- a/hw/virtio/vhost.c
62
+++ b/hw/virtio/vhost.c
63
@@ -XXX,XX +XXX,XX @@
64
#include "migration/blocker.h"
65
#include "migration/qemu-file-types.h"
66
#include "sysemu/dma.h"
67
-#include "sysemu/tcg.h"
68
#include "trace.h"
69
70
/* enabled until disconnected backend stabilizes */
71
diff --git a/linux-user/main.c b/linux-user/main.c
72
index XXXXXXX..XXXXXXX 100644
73
--- a/linux-user/main.c
74
+++ b/linux-user/main.c
75
@@ -XXX,XX +XXX,XX @@
76
#include "qemu-common.h"
77
#include "qemu/units.h"
78
#include "qemu/accel.h"
79
-#include "sysemu/tcg.h"
80
#include "qemu-version.h"
81
#include <sys/syscall.h>
82
#include <sys/resource.h>
83
diff --git a/monitor/misc.c b/monitor/misc.c
84
index XXXXXXX..XXXXXXX 100644
85
--- a/monitor/misc.c
86
+++ b/monitor/misc.c
87
@@ -XXX,XX +XXX,XX @@
88
#include "qapi/util.h"
89
#include "sysemu/blockdev.h"
90
#include "sysemu/sysemu.h"
91
-#include "sysemu/tcg.h"
92
#include "sysemu/tpm.h"
93
#include "qapi/qmp/qdict.h"
94
#include "qapi/qmp/qerror.h"
95
diff --git a/target/arm/helper.c b/target/arm/helper.c
96
index XXXXXXX..XXXXXXX 100644
97
--- a/target/arm/helper.c
98
+++ b/target/arm/helper.c
99
@@ -XXX,XX +XXX,XX @@
100
#include "sysemu/cpus.h"
101
#include "sysemu/cpu-timers.h"
102
#include "sysemu/kvm.h"
103
-#include "sysemu/tcg.h"
104
#include "qemu/range.h"
105
#include "qapi/qapi-commands-machine-target.h"
106
#include "qapi/error.h"
107
diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c
108
index XXXXXXX..XXXXXXX 100644
109
--- a/target/s390x/cpu_models_sysemu.c
110
+++ b/target/s390x/cpu_models_sysemu.c
111
@@ -XXX,XX +XXX,XX @@
112
#include "s390x-internal.h"
113
#include "kvm/kvm_s390x.h"
114
#include "sysemu/kvm.h"
115
-#include "sysemu/tcg.h"
116
#include "qapi/error.h"
117
#include "qapi/visitor.h"
118
#include "qapi/qmp/qerror.h"
119
diff --git a/target/s390x/helper.c b/target/s390x/helper.c
120
index XXXXXXX..XXXXXXX 100644
121
--- a/target/s390x/helper.c
122
+++ b/target/s390x/helper.c
123
@@ -XXX,XX +XXX,XX @@
124
#include "hw/s390x/pv.h"
125
#include "sysemu/hw_accel.h"
126
#include "sysemu/runstate.h"
127
-#include "sysemu/tcg.h"
128
129
void s390x_tod_timer(void *opaque)
130
{
131
--
24
--
132
2.34.1
25
2.34.1
diff view generated by jsdifflib