1
The following changes since commit 7c18f2d663521f1b31b821a13358ce38075eaf7d:
1
Pretty small still, but there are two patches that ought
2
to get backported to stable, so no point in delaying.
2
3
3
Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging (2023-04-29 23:07:17 +0100)
4
r~
5
6
The following changes since commit a5ba0a7e4e150d1350a041f0d0ef9ca6c8d7c307:
7
8
Merge tag 'pull-aspeed-20241211' of https://github.com/legoater/qemu into staging (2024-12-11 15:16:47 +0000)
4
9
5
are available in the Git repository at:
10
are available in the Git repository at:
6
11
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230502
12
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20241212
8
13
9
for you to fetch changes up to bdc7fba1c5a29ae218b45353daac9308fe1aae82:
14
for you to fetch changes up to 7ac87b14a92234b6a89b701b4043ad6cf8bdcccf:
10
15
11
tcg: Introduce tcg_out_movext2 (2023-05-02 12:15:41 +0100)
16
target/sparc: Use memcpy() and remove memcpy32() (2024-12-12 14:28:38 -0600)
12
17
13
----------------------------------------------------------------
18
----------------------------------------------------------------
14
Misc tcg-related patch queue.
19
tcg: Reset free_temps before tcg_optimize
20
tcg/riscv: Fix StoreStore barrier generation
21
include/exec: Introduce fpst alias in helper-head.h.inc
22
target/sparc: Use memcpy() and remove memcpy32()
15
23
16
----------------------------------------------------------------
24
----------------------------------------------------------------
17
Dickon Hood (1):
25
Philippe Mathieu-Daudé (1):
18
qemu/bitops.h: Limit rotate amounts
26
target/sparc: Use memcpy() and remove memcpy32()
19
27
20
Kiran Ostrolenk (1):
28
Richard Henderson (2):
21
qemu/host-utils.h: Add clz and ctz functions for lower-bit integers
29
tcg: Reset free_temps before tcg_optimize
30
include/exec: Introduce fpst alias in helper-head.h.inc
22
31
23
Nazar Kazakov (2):
32
Roman Artemev (1):
24
tcg: Add tcg_gen_gvec_andcs
33
tcg/riscv: Fix StoreStore barrier generation
25
tcg: Add tcg_gen_gvec_rotrs
26
34
27
Richard Henderson (7):
35
include/tcg/tcg-temp-internal.h | 6 ++++++
28
softmmu: Tidy dirtylimit_dirty_ring_full_time
36
accel/tcg/plugin-gen.c | 2 +-
29
qemu/int128: Re-shuffle Int128Alias members
37
target/sparc/win_helper.c | 26 ++++++++------------------
30
migration/xbzrle: Use __attribute__((target)) for avx512
38
tcg/tcg.c | 5 ++++-
31
accel/tcg: Add cpu_ld*_code_mmu
39
include/exec/helper-head.h.inc | 3 +++
32
tcg/loongarch64: Conditionalize tcg_out_exts_i32_i64
40
tcg/riscv/tcg-target.c.inc | 2 +-
33
tcg/mips: Conditionalize tcg_out_exts_i32_i64
41
6 files changed, 23 insertions(+), 21 deletions(-)
34
tcg: Introduce tcg_out_movext2
35
42
36
Weiwei Li (1):
37
accel/tcg: Uncache the host address for instruction fetch when tlb size < 1
38
39
meson.build | 5 +--
40
accel/tcg/tcg-runtime.h | 1 +
41
include/exec/cpu_ldst.h | 9 ++++++
42
include/qemu/bitops.h | 24 +++++++++-----
43
include/qemu/host-utils.h | 54 +++++++++++++++++++++++++++++++
44
include/qemu/int128.h | 4 +--
45
include/tcg/tcg-op-gvec.h | 4 +++
46
accel/tcg/cputlb.c | 53 ++++++++++++++++++++++++++++++
47
accel/tcg/tcg-runtime-gvec.c | 11 +++++++
48
accel/tcg/user-exec.c | 58 +++++++++++++++++++++++++++++++++
49
migration/xbzrle.c | 9 +++---
50
softmmu/dirtylimit.c | 15 ++++++---
51
tcg/tcg-op-gvec.c | 28 ++++++++++++++++
52
tcg/tcg.c | 69 +++++++++++++++++++++++++++++++++++++---
53
tcg/arm/tcg-target.c.inc | 44 +++++++++++--------------
54
tcg/i386/tcg-target.c.inc | 19 +++++------
55
tcg/loongarch64/tcg-target.c.inc | 4 ++-
56
tcg/mips/tcg-target.c.inc | 4 ++-
57
18 files changed, 347 insertions(+), 68 deletions(-)
diff view generated by jsdifflib
1
This is common code in most qemu_{ld,st} slow paths, moving two
1
When allocating new temps during tcg_optmize, do not re-use
2
registers when there may be overlap between sources and destinations.
2
any EBB temps that were used within the TB. We do not have
3
At present, this is only used by 32-bit hosts for 64-bit data,
3
any idea what span of the TB in which the temp was live.
4
but will shortly be used for more than that.
5
4
5
Introduce tcg_temp_ebb_reset_freed and use before tcg_optimize,
6
as well as replacing the equivalent in plugin_gen_inject and
7
tcg_func_start.
8
9
Cc: qemu-stable@nongnu.org
10
Fixes: fb04ab7ddd8 ("tcg/optimize: Lower TCG_COND_TST{EQ,NE} if unsupported")
11
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2711
12
Reported-by: wannacu <wannacu2049@gmail.com>
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
15
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
16
---
9
tcg/tcg.c | 69 ++++++++++++++++++++++++++++++++++++---
17
include/tcg/tcg-temp-internal.h | 6 ++++++
10
tcg/arm/tcg-target.c.inc | 44 ++++++++++---------------
18
accel/tcg/plugin-gen.c | 2 +-
11
tcg/i386/tcg-target.c.inc | 19 +++++------
19
tcg/tcg.c | 5 ++++-
12
3 files changed, 90 insertions(+), 42 deletions(-)
20
3 files changed, 11 insertions(+), 2 deletions(-)
13
21
22
diff --git a/include/tcg/tcg-temp-internal.h b/include/tcg/tcg-temp-internal.h
23
index XXXXXXX..XXXXXXX 100644
24
--- a/include/tcg/tcg-temp-internal.h
25
+++ b/include/tcg/tcg-temp-internal.h
26
@@ -XXX,XX +XXX,XX @@ TCGv_i64 tcg_temp_ebb_new_i64(void);
27
TCGv_ptr tcg_temp_ebb_new_ptr(void);
28
TCGv_i128 tcg_temp_ebb_new_i128(void);
29
30
+/* Forget all freed EBB temps, so that new allocations produce new temps. */
31
+static inline void tcg_temp_ebb_reset_freed(TCGContext *s)
32
+{
33
+ memset(s->free_temps, 0, sizeof(s->free_temps));
34
+}
35
+
36
#endif /* TCG_TEMP_FREE_H */
37
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/accel/tcg/plugin-gen.c
40
+++ b/accel/tcg/plugin-gen.c
41
@@ -XXX,XX +XXX,XX @@ static void plugin_gen_inject(struct qemu_plugin_tb *plugin_tb)
42
* that might be live within the existing opcode stream.
43
* The simplest solution is to release them all and create new.
44
*/
45
- memset(tcg_ctx->free_temps, 0, sizeof(tcg_ctx->free_temps));
46
+ tcg_temp_ebb_reset_freed(tcg_ctx);
47
48
QTAILQ_FOREACH_SAFE(op, &tcg_ctx->ops, link, next) {
49
switch (op->opc) {
14
diff --git a/tcg/tcg.c b/tcg/tcg.c
50
diff --git a/tcg/tcg.c b/tcg/tcg.c
15
index XXXXXXX..XXXXXXX 100644
51
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/tcg.c
52
--- a/tcg/tcg.c
17
+++ b/tcg/tcg.c
53
+++ b/tcg/tcg.c
18
@@ -XXX,XX +XXX,XX @@ static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
54
@@ -XXX,XX +XXX,XX @@ void tcg_func_start(TCGContext *s)
19
static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
55
s->nb_temps = s->nb_globals;
20
static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
56
21
static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
57
/* No temps have been previously allocated for size or locality. */
22
-static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
58
- memset(s->free_temps, 0, sizeof(s->free_temps));
23
- __attribute__((unused));
59
+ tcg_temp_ebb_reset_freed(s);
24
+static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
60
25
static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
61
/* No constant temps have been previously allocated. */
26
static void tcg_out_goto_tb(TCGContext *s, int which);
62
for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
27
static void tcg_out_op(TCGContext *s, TCGOpcode opc,
63
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
28
@@ -XXX,XX +XXX,XX @@ void tcg_raise_tb_overflow(TCGContext *s)
64
}
29
siglongjmp(s->jmp_trans, -2);
65
#endif
30
}
66
31
67
+ /* Do not reuse any EBB that may be allocated within the TB. */
32
+typedef struct TCGMovExtend {
68
+ tcg_temp_ebb_reset_freed(s);
33
+ TCGReg dst;
34
+ TCGReg src;
35
+ TCGType dst_type;
36
+ TCGType src_type;
37
+ MemOp src_ext;
38
+} TCGMovExtend;
39
+
69
+
40
/**
70
tcg_optimize(s);
41
* tcg_out_movext -- move and extend
71
42
* @s: tcg context
72
reachable_code_pass(s);
43
@@ -XXX,XX +XXX,XX @@ void tcg_raise_tb_overflow(TCGContext *s)
44
*
45
* Move or extend @src into @dst, depending on @src_ext and the types.
46
*/
47
-static void __attribute__((unused))
48
-tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
49
- TCGType src_type, MemOp src_ext, TCGReg src)
50
+static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
51
+ TCGType src_type, MemOp src_ext, TCGReg src)
52
{
53
switch (src_ext) {
54
case MO_UB:
55
@@ -XXX,XX +XXX,XX @@ tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
56
}
57
}
58
59
+/* Minor variations on a theme, using a structure. */
60
+static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
61
+ TCGReg src)
62
+{
63
+ tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
64
+}
65
+
66
+static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
67
+{
68
+ tcg_out_movext1_new_src(s, i, i->src);
69
+}
70
+
71
+/**
72
+ * tcg_out_movext2 -- move and extend two pair
73
+ * @s: tcg context
74
+ * @i1: first move description
75
+ * @i2: second move description
76
+ * @scratch: temporary register, or -1 for none
77
+ *
78
+ * As tcg_out_movext, for both @i1 and @i2, caring for overlap
79
+ * between the sources and destinations.
80
+ */
81
+
82
+static void __attribute__((unused))
83
+tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
84
+ const TCGMovExtend *i2, int scratch)
85
+{
86
+ TCGReg src1 = i1->src;
87
+ TCGReg src2 = i2->src;
88
+
89
+ if (i1->dst != src2) {
90
+ tcg_out_movext1(s, i1);
91
+ tcg_out_movext1(s, i2);
92
+ return;
93
+ }
94
+ if (i2->dst == src1) {
95
+ TCGType src1_type = i1->src_type;
96
+ TCGType src2_type = i2->src_type;
97
+
98
+ if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
99
+ /* The data is now in the correct registers, now extend. */
100
+ src1 = i2->src;
101
+ src2 = i1->src;
102
+ } else {
103
+ tcg_debug_assert(scratch >= 0);
104
+ tcg_out_mov(s, src1_type, scratch, src1);
105
+ src1 = scratch;
106
+ }
107
+ }
108
+ tcg_out_movext1_new_src(s, i2, src2);
109
+ tcg_out_movext1_new_src(s, i1, src1);
110
+}
111
+
112
#define C_PFX1(P, A) P##A
113
#define C_PFX2(P, A, B) P##A##_##B
114
#define C_PFX3(P, A, B, C) P##A##_##B##_##C
115
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
116
index XXXXXXX..XXXXXXX 100644
117
--- a/tcg/arm/tcg-target.c.inc
118
+++ b/tcg/arm/tcg-target.c.inc
119
@@ -XXX,XX +XXX,XX @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
120
121
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
122
{
123
- TCGReg argreg, datalo, datahi;
124
+ TCGReg argreg;
125
MemOpIdx oi = lb->oi;
126
MemOp opc = get_memop(oi);
127
128
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
129
/* Use the canonical unsigned helpers and minimize icache usage. */
130
tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
131
132
- datalo = lb->datalo_reg;
133
- datahi = lb->datahi_reg;
134
if ((opc & MO_SIZE) == MO_64) {
135
- if (datalo != TCG_REG_R1) {
136
- tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
137
- tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
138
- } else if (datahi != TCG_REG_R0) {
139
- tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
140
- tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
141
- } else {
142
- tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0);
143
- tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
144
- tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_TMP);
145
- }
146
+ TCGMovExtend ext[2] = {
147
+ { .dst = lb->datalo_reg, .dst_type = TCG_TYPE_I32,
148
+ .src = TCG_REG_R0, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
149
+ { .dst = lb->datahi_reg, .dst_type = TCG_TYPE_I32,
150
+ .src = TCG_REG_R1, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
151
+ };
152
+ tcg_out_movext2(s, &ext[0], &ext[1], TCG_REG_TMP);
153
} else {
154
- tcg_out_movext(s, TCG_TYPE_I32, datalo,
155
+ tcg_out_movext(s, TCG_TYPE_I32, lb->datalo_reg,
156
TCG_TYPE_I32, opc & MO_SSIZE, TCG_REG_R0);
157
}
158
159
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
160
161
if (TARGET_LONG_BITS == 64) {
162
/* 64-bit target address is aligned into R2:R3. */
163
- if (l->addrhi_reg != TCG_REG_R2) {
164
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, l->addrlo_reg);
165
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, l->addrhi_reg);
166
- } else if (l->addrlo_reg != TCG_REG_R3) {
167
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, l->addrhi_reg);
168
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, l->addrlo_reg);
169
- } else {
170
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, TCG_REG_R2);
171
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, TCG_REG_R3);
172
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, TCG_REG_R1);
173
- }
174
+ TCGMovExtend ext[2] = {
175
+ { .dst = TCG_REG_R2, .dst_type = TCG_TYPE_I32,
176
+ .src = l->addrlo_reg,
177
+ .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
178
+ { .dst = TCG_REG_R3, .dst_type = TCG_TYPE_I32,
179
+ .src = l->addrhi_reg,
180
+ .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
181
+ };
182
+ tcg_out_movext2(s, &ext[0], &ext[1], TCG_REG_TMP);
183
} else {
184
tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, l->addrlo_reg);
185
}
186
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
187
index XXXXXXX..XXXXXXX 100644
188
--- a/tcg/i386/tcg-target.c.inc
189
+++ b/tcg/i386/tcg-target.c.inc
190
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
191
{
192
MemOpIdx oi = l->oi;
193
MemOp opc = get_memop(oi);
194
- TCGReg data_reg;
195
tcg_insn_unit **label_ptr = &l->label_ptr[0];
196
197
/* resolve label address */
198
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
199
200
tcg_out_branch(s, 1, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
201
202
- data_reg = l->datalo_reg;
203
if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
204
- if (data_reg == TCG_REG_EDX) {
205
- /* xchg %edx, %eax */
206
- tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
207
- tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
208
- } else {
209
- tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
210
- tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
211
- }
212
+ TCGMovExtend ext[2] = {
213
+ { .dst = l->datalo_reg, .dst_type = TCG_TYPE_I32,
214
+ .src = TCG_REG_EAX, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
215
+ { .dst = l->datahi_reg, .dst_type = TCG_TYPE_I32,
216
+ .src = TCG_REG_EDX, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
217
+ };
218
+ tcg_out_movext2(s, &ext[0], &ext[1], -1);
219
} else {
220
- tcg_out_movext(s, l->type, data_reg,
221
+ tcg_out_movext(s, l->type, l->datalo_reg,
222
TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_EAX);
223
}
224
225
--
73
--
226
2.34.1
74
2.43.0
227
75
228
76
diff view generated by jsdifflib
1
Since TCG_TYPE_I32 values are kept sign-extended in registers, we need not
1
From: Roman Artemev <roman.artemev@syntacore.com>
2
extend if the register matches. This is already relied upon by comparisons.
3
2
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
On RISC-V to StoreStore barrier corresponds
4
`fence w, w` not `fence r, r`
5
6
Cc: qemu-stable@nongnu.org
7
Fixes: efbea94c76b ("tcg/riscv: Add slowpath load and store instructions")
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Denis Tomashev <denis.tomashev@syntacore.com>
10
Signed-off-by: Roman Artemev <roman.artemev@syntacore.com>
11
Message-ID: <e2f2131e294a49e79959d4fa9ec02cf4@syntacore.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
13
---
7
tcg/mips/tcg-target.c.inc | 4 +++-
14
tcg/riscv/tcg-target.c.inc | 2 +-
8
1 file changed, 3 insertions(+), 1 deletion(-)
15
1 file changed, 1 insertion(+), 1 deletion(-)
9
16
10
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
17
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
11
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/mips/tcg-target.c.inc
19
--- a/tcg/riscv/tcg-target.c.inc
13
+++ b/tcg/mips/tcg-target.c.inc
20
+++ b/tcg/riscv/tcg-target.c.inc
14
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rs)
21
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
15
22
insn |= 0x02100000;
16
static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rs)
23
}
17
{
24
if (a0 & TCG_MO_ST_ST) {
18
- tcg_out_ext32s(s, rd, rs);
25
- insn |= 0x02200000;
19
+ if (rd != rs) {
26
+ insn |= 0x01100000;
20
+ tcg_out_ext32s(s, rd, rs);
27
}
21
+ }
28
tcg_out32(s, insn);
22
}
29
}
23
24
static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rs)
25
--
30
--
26
2.34.1
31
2.43.0
27
28
diff view generated by jsdifflib
1
Since TCG_TYPE_I32 values are kept sign-extended in registers,
1
This allows targets to declare that the helper requires a
2
via ".w" instructions, we need not extend if the register matches.
2
float_status pointer and instead of a generic void pointer.
3
This is already relied upon by comparisons.
4
3
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
tcg/loongarch64/tcg-target.c.inc | 4 +++-
7
include/exec/helper-head.h.inc | 3 +++
9
1 file changed, 3 insertions(+), 1 deletion(-)
8
1 file changed, 3 insertions(+)
10
9
11
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
10
diff --git a/include/exec/helper-head.h.inc b/include/exec/helper-head.h.inc
12
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/loongarch64/tcg-target.c.inc
12
--- a/include/exec/helper-head.h.inc
14
+++ b/tcg/loongarch64/tcg-target.c.inc
13
+++ b/include/exec/helper-head.h.inc
15
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg)
14
@@ -XXX,XX +XXX,XX @@
16
15
#define dh_alias_ptr ptr
17
static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg)
16
#define dh_alias_cptr ptr
18
{
17
#define dh_alias_env ptr
19
- tcg_out_ext32s(s, ret, arg);
18
+#define dh_alias_fpst ptr
20
+ if (ret != arg) {
19
#define dh_alias_void void
21
+ tcg_out_ext32s(s, ret, arg);
20
#define dh_alias_noreturn noreturn
22
+ }
21
#define dh_alias(t) glue(dh_alias_, t)
23
}
22
@@ -XXX,XX +XXX,XX @@
24
23
#define dh_ctype_ptr void *
25
static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg)
24
#define dh_ctype_cptr const void *
25
#define dh_ctype_env CPUArchState *
26
+#define dh_ctype_fpst float_status *
27
#define dh_ctype_void void
28
#define dh_ctype_noreturn G_NORETURN void
29
#define dh_ctype(t) dh_ctype_##t
30
@@ -XXX,XX +XXX,XX @@
31
#define dh_typecode_f64 dh_typecode_i64
32
#define dh_typecode_cptr dh_typecode_ptr
33
#define dh_typecode_env dh_typecode_ptr
34
+#define dh_typecode_fpst dh_typecode_ptr
35
#define dh_typecode(t) dh_typecode_##t
36
37
#define dh_callflag_i32 0
26
--
38
--
27
2.34.1
39
2.43.0
28
40
29
41
diff view generated by jsdifflib
1
Drop inline marker: let compiler decide.
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
2
3
Change return type to uint64_t: this matches the computation in the
3
Rather than manually copying each register, use
4
return statement and the local variable assignment in the caller.
4
the libc memcpy(), which is well optimized nowadays.
5
5
6
Rename local to dirty_ring_size_MB to fix typo.
6
Suggested-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
7
Simplify conversion to MiB via qemu_target_page_bits and right shift.
7
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
8
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
9
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
Reviewed-by: Thomas Huth <thuth@redhat.com>
10
Message-ID: <20241205205418.67613-1-philmd@linaro.org>
11
Reviewed-by: Juan Quintela <quintela@redhat.com>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
12
---
14
softmmu/dirtylimit.c | 15 ++++++++++-----
13
target/sparc/win_helper.c | 26 ++++++++------------------
15
1 file changed, 10 insertions(+), 5 deletions(-)
14
1 file changed, 8 insertions(+), 18 deletions(-)
16
15
17
diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
16
diff --git a/target/sparc/win_helper.c b/target/sparc/win_helper.c
18
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
19
--- a/softmmu/dirtylimit.c
18
--- a/target/sparc/win_helper.c
20
+++ b/softmmu/dirtylimit.c
19
+++ b/target/sparc/win_helper.c
21
@@ -XXX,XX +XXX,XX @@ bool dirtylimit_vcpu_index_valid(int cpu_index)
20
@@ -XXX,XX +XXX,XX @@
22
cpu_index >= ms->smp.max_cpus);
21
#include "exec/helper-proto.h"
22
#include "trace.h"
23
24
-static inline void memcpy32(target_ulong *dst, const target_ulong *src)
25
-{
26
- dst[0] = src[0];
27
- dst[1] = src[1];
28
- dst[2] = src[2];
29
- dst[3] = src[3];
30
- dst[4] = src[4];
31
- dst[5] = src[5];
32
- dst[6] = src[6];
33
- dst[7] = src[7];
34
-}
35
-
36
void cpu_set_cwp(CPUSPARCState *env, int new_cwp)
37
{
38
/* put the modified wrap registers at their proper location */
39
if (env->cwp == env->nwindows - 1) {
40
- memcpy32(env->regbase, env->regbase + env->nwindows * 16);
41
+ memcpy(env->regbase, env->regbase + env->nwindows * 16,
42
+ sizeof(env->gregs));
43
}
44
env->cwp = new_cwp;
45
46
/* put the wrap registers at their temporary location */
47
if (new_cwp == env->nwindows - 1) {
48
- memcpy32(env->regbase + env->nwindows * 16, env->regbase);
49
+ memcpy(env->regbase + env->nwindows * 16, env->regbase,
50
+ sizeof(env->gregs));
51
}
52
env->regwptr = env->regbase + (new_cwp * 16);
23
}
53
}
24
54
@@ -XXX,XX +XXX,XX @@ void cpu_gl_switch_gregs(CPUSPARCState *env, uint32_t new_gl)
25
-static inline int64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
55
dst = get_gl_gregset(env, env->gl);
26
+static uint64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
56
27
{
57
if (src != dst) {
28
static uint64_t max_dirtyrate;
58
- memcpy32(dst, env->gregs);
29
- uint32_t dirty_ring_size = kvm_dirty_ring_size();
59
- memcpy32(env->gregs, src);
30
- uint64_t dirty_ring_size_meory_MB =
60
+ memcpy(dst, env->gregs, sizeof(env->gregs));
31
- dirty_ring_size * qemu_target_page_size() >> 20;
61
+ memcpy(env->gregs, src, sizeof(env->gregs));
32
+ unsigned target_page_bits = qemu_target_page_bits();
33
+ uint64_t dirty_ring_size_MB;
34
+
35
+ /* So far, the largest (non-huge) page size is 64k, i.e. 16 bits. */
36
+ assert(target_page_bits < 20);
37
+
38
+ /* Convert ring size (pages) to MiB (2**20). */
39
+ dirty_ring_size_MB = kvm_dirty_ring_size() >> (20 - target_page_bits);
40
41
if (max_dirtyrate < dirtyrate) {
42
max_dirtyrate = dirtyrate;
43
}
62
}
44
45
- return dirty_ring_size_meory_MB * 1000000 / max_dirtyrate;
46
+ return dirty_ring_size_MB * 1000000 / max_dirtyrate;
47
}
63
}
48
64
49
static inline bool dirtylimit_done(uint64_t quota,
65
@@ -XXX,XX +XXX,XX @@ void cpu_change_pstate(CPUSPARCState *env, uint32_t new_pstate)
66
/* Switch global register bank */
67
src = get_gregset(env, new_pstate_regs);
68
dst = get_gregset(env, pstate_regs);
69
- memcpy32(dst, env->gregs);
70
- memcpy32(env->gregs, src);
71
+ memcpy(dst, env->gregs, sizeof(env->gregs));
72
+ memcpy(env->gregs, src, sizeof(env->gregs));
73
} else {
74
trace_win_helper_no_switch_pstate(new_pstate_regs);
75
}
50
--
76
--
51
2.34.1
77
2.43.0
52
78
53
79
diff view generated by jsdifflib
Deleted patch
1
From: Weiwei Li <liweiwei@iscas.ac.cn>
2
1
3
When PMP entry overlap part of the page, we'll set the tlb_size to 1, which
4
will make the address in tlb entry set with TLB_INVALID_MASK, and the next
5
access will again go through tlb_fill.However, this way will not work in
6
tb_gen_code() => get_page_addr_code_hostp(): the TLB host address will be
7
cached, and the following instructions can use this host address directly
8
which may lead to the bypass of PMP related check.
9
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1542.
10
11
Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn>
12
Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn>
13
Reviewed-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
14
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
15
Message-Id: <20230422130329.23555-6-liweiwei@iscas.ac.cn>
16
---
17
accel/tcg/cputlb.c | 5 +++++
18
1 file changed, 5 insertions(+)
19
20
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
21
index XXXXXXX..XXXXXXX 100644
22
--- a/accel/tcg/cputlb.c
23
+++ b/accel/tcg/cputlb.c
24
@@ -XXX,XX +XXX,XX @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
25
if (p == NULL) {
26
return -1;
27
}
28
+
29
+ if (full->lg_page_size < TARGET_PAGE_BITS) {
30
+ return -1;
31
+ }
32
+
33
if (hostp) {
34
*hostp = p;
35
}
36
--
37
2.34.1
diff view generated by jsdifflib
Deleted patch
1
From: Dickon Hood <dickon.hood@codethink.co.uk>
2
1
3
Rotates have been fixed up to only allow for reasonable rotate amounts
4
(ie, no rotates >7 on an 8b value etc.) This fixes a problem with riscv
5
vector rotate instructions.
6
7
Signed-off-by: Dickon Hood <dickon.hood@codethink.co.uk>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-Id: <20230428144757.57530-9-lawrence.hunter@codethink.co.uk>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
12
include/qemu/bitops.h | 24 ++++++++++++++++--------
13
1 file changed, 16 insertions(+), 8 deletions(-)
14
15
diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/include/qemu/bitops.h
18
+++ b/include/qemu/bitops.h
19
@@ -XXX,XX +XXX,XX @@ static inline unsigned long find_first_zero_bit(const unsigned long *addr,
20
*/
21
static inline uint8_t rol8(uint8_t word, unsigned int shift)
22
{
23
- return (word << shift) | (word >> ((8 - shift) & 7));
24
+ shift &= 7;
25
+ return (word << shift) | (word >> (8 - shift));
26
}
27
28
/**
29
@@ -XXX,XX +XXX,XX @@ static inline uint8_t rol8(uint8_t word, unsigned int shift)
30
*/
31
static inline uint8_t ror8(uint8_t word, unsigned int shift)
32
{
33
- return (word >> shift) | (word << ((8 - shift) & 7));
34
+ shift &= 7;
35
+ return (word >> shift) | (word << (8 - shift));
36
}
37
38
/**
39
@@ -XXX,XX +XXX,XX @@ static inline uint8_t ror8(uint8_t word, unsigned int shift)
40
*/
41
static inline uint16_t rol16(uint16_t word, unsigned int shift)
42
{
43
- return (word << shift) | (word >> ((16 - shift) & 15));
44
+ shift &= 15;
45
+ return (word << shift) | (word >> (16 - shift));
46
}
47
48
/**
49
@@ -XXX,XX +XXX,XX @@ static inline uint16_t rol16(uint16_t word, unsigned int shift)
50
*/
51
static inline uint16_t ror16(uint16_t word, unsigned int shift)
52
{
53
- return (word >> shift) | (word << ((16 - shift) & 15));
54
+ shift &= 15;
55
+ return (word >> shift) | (word << (16 - shift));
56
}
57
58
/**
59
@@ -XXX,XX +XXX,XX @@ static inline uint16_t ror16(uint16_t word, unsigned int shift)
60
*/
61
static inline uint32_t rol32(uint32_t word, unsigned int shift)
62
{
63
- return (word << shift) | (word >> ((32 - shift) & 31));
64
+ shift &= 31;
65
+ return (word << shift) | (word >> (32 - shift));
66
}
67
68
/**
69
@@ -XXX,XX +XXX,XX @@ static inline uint32_t rol32(uint32_t word, unsigned int shift)
70
*/
71
static inline uint32_t ror32(uint32_t word, unsigned int shift)
72
{
73
- return (word >> shift) | (word << ((32 - shift) & 31));
74
+ shift &= 31;
75
+ return (word >> shift) | (word << (32 - shift));
76
}
77
78
/**
79
@@ -XXX,XX +XXX,XX @@ static inline uint32_t ror32(uint32_t word, unsigned int shift)
80
*/
81
static inline uint64_t rol64(uint64_t word, unsigned int shift)
82
{
83
- return (word << shift) | (word >> ((64 - shift) & 63));
84
+ shift &= 63;
85
+ return (word << shift) | (word >> (64 - shift));
86
}
87
88
/**
89
@@ -XXX,XX +XXX,XX @@ static inline uint64_t rol64(uint64_t word, unsigned int shift)
90
*/
91
static inline uint64_t ror64(uint64_t word, unsigned int shift)
92
{
93
- return (word >> shift) | (word << ((64 - shift) & 63));
94
+ shift &= 63;
95
+ return (word >> shift) | (word << (64 - shift));
96
}
97
98
/**
99
--
100
2.34.1
diff view generated by jsdifflib
Deleted patch
1
From: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
2
1
3
This is for use in the RISC-V vclz and vctz instructions (implemented in
4
proceeding commit).
5
6
Signed-off-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-Id: <20230428144757.57530-11-lawrence.hunter@codethink.co.uk>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
include/qemu/host-utils.h | 54 +++++++++++++++++++++++++++++++++++++++
12
1 file changed, 54 insertions(+)
13
14
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/include/qemu/host-utils.h
17
+++ b/include/qemu/host-utils.h
18
@@ -XXX,XX +XXX,XX @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
19
}
20
#endif
21
22
+/**
23
+ * clz8 - count leading zeros in a 8-bit value.
24
+ * @val: The value to search
25
+ *
26
+ * Returns 8 if the value is zero. Note that the GCC builtin is
27
+ * undefined if the value is zero.
28
+ *
29
+ * Note that the GCC builtin will upcast its argument to an `unsigned int`
30
+ * so this function subtracts off the number of prepended zeroes.
31
+ */
32
+static inline int clz8(uint8_t val)
33
+{
34
+ return val ? __builtin_clz(val) - 24 : 8;
35
+}
36
+
37
+/**
38
+ * clz16 - count leading zeros in a 16-bit value.
39
+ * @val: The value to search
40
+ *
41
+ * Returns 16 if the value is zero. Note that the GCC builtin is
42
+ * undefined if the value is zero.
43
+ *
44
+ * Note that the GCC builtin will upcast its argument to an `unsigned int`
45
+ * so this function subtracts off the number of prepended zeroes.
46
+ */
47
+static inline int clz16(uint16_t val)
48
+{
49
+ return val ? __builtin_clz(val) - 16 : 16;
50
+}
51
+
52
/**
53
* clz32 - count leading zeros in a 32-bit value.
54
* @val: The value to search
55
@@ -XXX,XX +XXX,XX @@ static inline int clo64(uint64_t val)
56
return clz64(~val);
57
}
58
59
+/**
60
+ * ctz8 - count trailing zeros in a 8-bit value.
61
+ * @val: The value to search
62
+ *
63
+ * Returns 8 if the value is zero. Note that the GCC builtin is
64
+ * undefined if the value is zero.
65
+ */
66
+static inline int ctz8(uint8_t val)
67
+{
68
+ return val ? __builtin_ctz(val) : 8;
69
+}
70
+
71
+/**
72
+ * ctz16 - count trailing zeros in a 16-bit value.
73
+ * @val: The value to search
74
+ *
75
+ * Returns 16 if the value is zero. Note that the GCC builtin is
76
+ * undefined if the value is zero.
77
+ */
78
+static inline int ctz16(uint16_t val)
79
+{
80
+ return val ? __builtin_ctz(val) : 16;
81
+}
82
+
83
/**
84
* ctz32 - count trailing zeros in a 32-bit value.
85
* @val: The value to search
86
--
87
2.34.1
diff view generated by jsdifflib
Deleted patch
1
From: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
2
1
3
Add tcg expander and helper functions for and-compliment
4
vector with scalar operand.
5
6
Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
7
Message-Id: <20230428144757.57530-10-lawrence.hunter@codethink.co.uk>
8
[rth: Split out of larger patch.]
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
accel/tcg/tcg-runtime.h | 1 +
12
include/tcg/tcg-op-gvec.h | 2 ++
13
accel/tcg/tcg-runtime-gvec.c | 11 +++++++++++
14
tcg/tcg-op-gvec.c | 17 +++++++++++++++++
15
4 files changed, 31 insertions(+)
16
17
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
18
index XXXXXXX..XXXXXXX 100644
19
--- a/accel/tcg/tcg-runtime.h
20
+++ b/accel/tcg/tcg-runtime.h
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(gvec_nor, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
22
DEF_HELPER_FLAGS_4(gvec_eqv, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
23
24
DEF_HELPER_FLAGS_4(gvec_ands, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
25
+DEF_HELPER_FLAGS_4(gvec_andcs, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
26
DEF_HELPER_FLAGS_4(gvec_xors, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
27
DEF_HELPER_FLAGS_4(gvec_ors, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
28
29
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
30
index XXXXXXX..XXXXXXX 100644
31
--- a/include/tcg/tcg-op-gvec.h
32
+++ b/include/tcg/tcg-op-gvec.h
33
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_ori(unsigned vece, uint32_t dofs, uint32_t aofs,
34
35
void tcg_gen_gvec_ands(unsigned vece, uint32_t dofs, uint32_t aofs,
36
TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
37
+void tcg_gen_gvec_andcs(unsigned vece, uint32_t dofs, uint32_t aofs,
38
+ TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
39
void tcg_gen_gvec_xors(unsigned vece, uint32_t dofs, uint32_t aofs,
40
TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
41
void tcg_gen_gvec_ors(unsigned vece, uint32_t dofs, uint32_t aofs,
42
diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/accel/tcg/tcg-runtime-gvec.c
45
+++ b/accel/tcg/tcg-runtime-gvec.c
46
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
47
clear_high(d, oprsz, desc);
48
}
49
50
+void HELPER(gvec_andcs)(void *d, void *a, uint64_t b, uint32_t desc)
51
+{
52
+ intptr_t oprsz = simd_oprsz(desc);
53
+ intptr_t i;
54
+
55
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
56
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & ~b;
57
+ }
58
+ clear_high(d, oprsz, desc);
59
+}
60
+
61
void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
62
{
63
intptr_t oprsz = simd_oprsz(desc);
64
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
65
index XXXXXXX..XXXXXXX 100644
66
--- a/tcg/tcg-op-gvec.c
67
+++ b/tcg/tcg-op-gvec.c
68
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_andi(unsigned vece, uint32_t dofs, uint32_t aofs,
69
tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_ands);
70
}
71
72
+void tcg_gen_gvec_andcs(unsigned vece, uint32_t dofs, uint32_t aofs,
73
+ TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
74
+{
75
+ static GVecGen2s g = {
76
+ .fni8 = tcg_gen_andc_i64,
77
+ .fniv = tcg_gen_andc_vec,
78
+ .fno = gen_helper_gvec_andcs,
79
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
80
+ .vece = MO_64
81
+ };
82
+
83
+ TCGv_i64 tmp = tcg_temp_ebb_new_i64();
84
+ tcg_gen_dup_i64(vece, tmp, c);
85
+ tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, c, &g);
86
+ tcg_temp_free_i64(tmp);
87
+}
88
+
89
static const GVecGen2s gop_xors = {
90
.fni8 = tcg_gen_xor_i64,
91
.fniv = tcg_gen_xor_vec,
92
--
93
2.34.1
diff view generated by jsdifflib
Deleted patch
1
From: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
2
1
3
Add tcg expander and helper functions for rotate right
4
vector with scalar operand.
5
6
Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
7
Message-Id: <20230428144757.57530-10-lawrence.hunter@codethink.co.uk>
8
[rth: Split out of larger patch; mask rotation count.]
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
include/tcg/tcg-op-gvec.h | 2 ++
12
tcg/tcg-op-gvec.c | 11 +++++++++++
13
2 files changed, 13 insertions(+)
14
15
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/include/tcg/tcg-op-gvec.h
18
+++ b/include/tcg/tcg-op-gvec.h
19
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_sars(unsigned vece, uint32_t dofs, uint32_t aofs,
20
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
21
void tcg_gen_gvec_rotls(unsigned vece, uint32_t dofs, uint32_t aofs,
22
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
23
+void tcg_gen_gvec_rotrs(unsigned vece, uint32_t dofs, uint32_t aofs,
24
+ TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
25
26
/*
27
* Perform vector shift by vector element, modulo the element size.
28
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/tcg/tcg-op-gvec.c
31
+++ b/tcg/tcg-op-gvec.c
32
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_rotls(unsigned vece, uint32_t dofs, uint32_t aofs,
33
do_gvec_shifts(vece, dofs, aofs, shift, oprsz, maxsz, &g);
34
}
35
36
+void tcg_gen_gvec_rotrs(unsigned vece, uint32_t dofs, uint32_t aofs,
37
+ TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz)
38
+{
39
+ TCGv_i32 tmp = tcg_temp_ebb_new_i32();
40
+
41
+ tcg_gen_neg_i32(tmp, shift);
42
+ tcg_gen_andi_i32(tmp, tmp, (8 << vece) - 1);
43
+ tcg_gen_gvec_rotls(vece, dofs, aofs, tmp, oprsz, maxsz);
44
+ tcg_temp_free_i32(tmp);
45
+}
46
+
47
/*
48
* Expand D = A << (B % element bits)
49
*
50
--
51
2.34.1
diff view generated by jsdifflib
Deleted patch
1
Clang 14, with --enable-tcg-interpreter errors with
2
1
3
include/qemu/int128.h:487:16: error: alignment of field 'i' (128 bits)
4
does not match the alignment of the first field in transparent union;
5
transparent_union attribute ignored [-Werror,-Wignored-attributes]
6
__int128_t i;
7
^
8
include/qemu/int128.h:486:12: note: alignment of first field is 64 bits
9
Int128 s;
10
^
11
1 error generated.
12
13
By placing the __uint128_t member first, this is avoided.
14
15
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
16
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
17
Message-Id: <20230501204625.277361-1-richard.henderson@linaro.org>
18
---
19
include/qemu/int128.h | 4 ++--
20
1 file changed, 2 insertions(+), 2 deletions(-)
21
22
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
23
index XXXXXXX..XXXXXXX 100644
24
--- a/include/qemu/int128.h
25
+++ b/include/qemu/int128.h
26
@@ -XXX,XX +XXX,XX @@ static inline void bswap128s(Int128 *s)
27
*/
28
#ifdef CONFIG_INT128
29
typedef union {
30
- Int128 s;
31
- __int128_t i;
32
__uint128_t u;
33
+ __int128_t i;
34
+ Int128 s;
35
} Int128Alias __attribute__((transparent_union));
36
#else
37
typedef Int128 Int128Alias;
38
--
39
2.34.1
40
41
diff view generated by jsdifflib
Deleted patch
1
Use the attribute, which is supported by clang, instead of
2
the #pragma, which is not supported and, for some reason,
3
also not detected by the meson probe, so we fail by -Werror.
4
1
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Juan Quintela <quintela@redhat.com>
7
Message-Id: <20230501210555.289806-1-richard.henderson@linaro.org>
8
---
9
meson.build | 5 +----
10
migration/xbzrle.c | 9 ++++-----
11
2 files changed, 5 insertions(+), 9 deletions(-)
12
13
diff --git a/meson.build b/meson.build
14
index XXXXXXX..XXXXXXX 100644
15
--- a/meson.build
16
+++ b/meson.build
17
@@ -XXX,XX +XXX,XX @@ config_host_data.set('CONFIG_AVX512F_OPT', get_option('avx512f') \
18
config_host_data.set('CONFIG_AVX512BW_OPT', get_option('avx512bw') \
19
.require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX512BW') \
20
.require(cc.links('''
21
- #pragma GCC push_options
22
- #pragma GCC target("avx512bw")
23
#include <cpuid.h>
24
#include <immintrin.h>
25
- static int bar(void *a) {
26
-
27
+ static int __attribute__((target("avx512bw"))) bar(void *a) {
28
__m512i *x = a;
29
__m512i res= _mm512_abs_epi8(*x);
30
return res[1];
31
diff --git a/migration/xbzrle.c b/migration/xbzrle.c
32
index XXXXXXX..XXXXXXX 100644
33
--- a/migration/xbzrle.c
34
+++ b/migration/xbzrle.c
35
@@ -XXX,XX +XXX,XX @@ int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen)
36
}
37
38
#if defined(CONFIG_AVX512BW_OPT)
39
-#pragma GCC push_options
40
-#pragma GCC target("avx512bw")
41
#include <immintrin.h>
42
-int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
43
- uint8_t *dst, int dlen)
44
+
45
+int __attribute__((target("avx512bw")))
46
+xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
47
+ uint8_t *dst, int dlen)
48
{
49
uint32_t zrun_len = 0, nzrun_len = 0;
50
int d = 0, i = 0, num = 0;
51
@@ -XXX,XX +XXX,XX @@ int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
52
}
53
return d;
54
}
55
-#pragma GCC pop_options
56
#endif
57
--
58
2.34.1
diff view generated by jsdifflib
Deleted patch
1
At least RISC-V has the need to be able to perform a read
2
using execute permissions, outside of translation.
3
Add helpers to facilitate this.
4
1
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Acked-by: Alistair Francis <alistair.francis@wdc.com>
7
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
8
Tested-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
9
Message-Id: <20230325105429.1142530-9-richard.henderson@linaro.org>
10
Message-Id: <20230412114333.118895-9-richard.henderson@linaro.org>
11
---
12
include/exec/cpu_ldst.h | 9 +++++++
13
accel/tcg/cputlb.c | 48 ++++++++++++++++++++++++++++++++++
14
accel/tcg/user-exec.c | 58 +++++++++++++++++++++++++++++++++++++++++
15
3 files changed, 115 insertions(+)
16
17
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
18
index XXXXXXX..XXXXXXX 100644
19
--- a/include/exec/cpu_ldst.h
20
+++ b/include/exec/cpu_ldst.h
21
@@ -XXX,XX +XXX,XX @@ static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
22
# define cpu_stq_mmu cpu_stq_le_mmu
23
#endif
24
25
+uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
26
+ MemOpIdx oi, uintptr_t ra);
27
+uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
28
+ MemOpIdx oi, uintptr_t ra);
29
+uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
30
+ MemOpIdx oi, uintptr_t ra);
31
+uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
32
+ MemOpIdx oi, uintptr_t ra);
33
+
34
uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr);
35
uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr);
36
uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr);
37
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/accel/tcg/cputlb.c
40
+++ b/accel/tcg/cputlb.c
41
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr)
42
MemOpIdx oi = make_memop_idx(MO_TEUQ, cpu_mmu_index(env, true));
43
return full_ldq_code(env, addr, oi, 0);
44
}
45
+
46
+uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
47
+ MemOpIdx oi, uintptr_t retaddr)
48
+{
49
+ return full_ldub_code(env, addr, oi, retaddr);
50
+}
51
+
52
+uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
53
+ MemOpIdx oi, uintptr_t retaddr)
54
+{
55
+ MemOp mop = get_memop(oi);
56
+ int idx = get_mmuidx(oi);
57
+ uint16_t ret;
58
+
59
+ ret = full_lduw_code(env, addr, make_memop_idx(MO_TEUW, idx), retaddr);
60
+ if ((mop & MO_BSWAP) != MO_TE) {
61
+ ret = bswap16(ret);
62
+ }
63
+ return ret;
64
+}
65
+
66
+uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
67
+ MemOpIdx oi, uintptr_t retaddr)
68
+{
69
+ MemOp mop = get_memop(oi);
70
+ int idx = get_mmuidx(oi);
71
+ uint32_t ret;
72
+
73
+ ret = full_ldl_code(env, addr, make_memop_idx(MO_TEUL, idx), retaddr);
74
+ if ((mop & MO_BSWAP) != MO_TE) {
75
+ ret = bswap32(ret);
76
+ }
77
+ return ret;
78
+}
79
+
80
+uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
81
+ MemOpIdx oi, uintptr_t retaddr)
82
+{
83
+ MemOp mop = get_memop(oi);
84
+ int idx = get_mmuidx(oi);
85
+ uint64_t ret;
86
+
87
+ ret = full_ldq_code(env, addr, make_memop_idx(MO_TEUQ, idx), retaddr);
88
+ if ((mop & MO_BSWAP) != MO_TE) {
89
+ ret = bswap64(ret);
90
+ }
91
+ return ret;
92
+}
93
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
94
index XXXXXXX..XXXXXXX 100644
95
--- a/accel/tcg/user-exec.c
96
+++ b/accel/tcg/user-exec.c
97
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr ptr)
98
return ret;
99
}
100
101
+uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
102
+ MemOpIdx oi, uintptr_t ra)
103
+{
104
+ void *haddr;
105
+ uint8_t ret;
106
+
107
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_INST_FETCH);
108
+ ret = ldub_p(haddr);
109
+ clear_helper_retaddr();
110
+ return ret;
111
+}
112
+
113
+uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
114
+ MemOpIdx oi, uintptr_t ra)
115
+{
116
+ void *haddr;
117
+ uint16_t ret;
118
+
119
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_INST_FETCH);
120
+ ret = lduw_p(haddr);
121
+ clear_helper_retaddr();
122
+ if (get_memop(oi) & MO_BSWAP) {
123
+ ret = bswap16(ret);
124
+ }
125
+ return ret;
126
+}
127
+
128
+uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
129
+ MemOpIdx oi, uintptr_t ra)
130
+{
131
+ void *haddr;
132
+ uint32_t ret;
133
+
134
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_INST_FETCH);
135
+ ret = ldl_p(haddr);
136
+ clear_helper_retaddr();
137
+ if (get_memop(oi) & MO_BSWAP) {
138
+ ret = bswap32(ret);
139
+ }
140
+ return ret;
141
+}
142
+
143
+uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
144
+ MemOpIdx oi, uintptr_t ra)
145
+{
146
+ void *haddr;
147
+ uint64_t ret;
148
+
149
+ validate_memop(oi, MO_BEUQ);
150
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
151
+ ret = ldq_p(haddr);
152
+ clear_helper_retaddr();
153
+ if (get_memop(oi) & MO_BSWAP) {
154
+ ret = bswap64(ret);
155
+ }
156
+ return ret;
157
+}
158
+
159
#include "ldst_common.c.inc"
160
161
/*
162
--
163
2.34.1
diff view generated by jsdifflib