1
The following changes since commit 15df33ceb73cb6bb3c6736cf4d2cff51129ed4b4:
1
Pretty small still, but there are two patches that ought
2
to get backported to stable, so no point in delaying.
2
3
3
Merge remote-tracking branch 'remotes/quic/tags/pull-hex-20220312-1' into staging (2022-03-13 17:29:18 +0000)
4
r~
5
6
The following changes since commit a5ba0a7e4e150d1350a041f0d0ef9ca6c8d7c307:
7
8
Merge tag 'pull-aspeed-20241211' of https://github.com/legoater/qemu into staging (2024-12-11 15:16:47 +0000)
4
9
5
are available in the Git repository at:
10
are available in the Git repository at:
6
11
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20220314
12
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20241212
8
13
9
for you to fetch changes up to 76cff100beeae8d3676bb658cccd45ef5ced8aa9:
14
for you to fetch changes up to 7ac87b14a92234b6a89b701b4043ad6cf8bdcccf:
10
15
11
tcg/arm: Don't emit UNPREDICTABLE LDRD with Rm == Rt or Rt+1 (2022-03-14 10:31:51 -0700)
16
target/sparc: Use memcpy() and remove memcpy32() (2024-12-12 14:28:38 -0600)
12
17
13
----------------------------------------------------------------
18
----------------------------------------------------------------
14
Fixes for s390x host vectors
19
tcg: Reset free_temps before tcg_optimize
15
Fix for arm ldrd unpredictable case
20
tcg/riscv: Fix StoreStore barrier generation
21
include/exec: Introduce fpst alias in helper-head.h.inc
22
target/sparc: Use memcpy() and remove memcpy32()
16
23
17
----------------------------------------------------------------
24
----------------------------------------------------------------
18
Richard Henderson (4):
25
Philippe Mathieu-Daudé (1):
19
tcg/s390x: Fix tcg_out_dupi_vec vs VGM
26
target/sparc: Use memcpy() and remove memcpy32()
20
tcg/s390x: Fix INDEX_op_bitsel_vec vs VSEL
21
tcg/s390x: Fix tcg_out_dup_vec vs general registers
22
tcg/arm: Don't emit UNPREDICTABLE LDRD with Rm == Rt or Rt+1
23
27
24
tcg/arm/tcg-target.c.inc | 17 +++++++++++++++--
28
Richard Henderson (2):
25
tcg/s390x/tcg-target.c.inc | 7 ++++---
29
tcg: Reset free_temps before tcg_optimize
26
2 files changed, 19 insertions(+), 5 deletions(-)
30
include/exec: Introduce fpst alias in helper-head.h.inc
31
32
Roman Artemev (1):
33
tcg/riscv: Fix StoreStore barrier generation
34
35
include/tcg/tcg-temp-internal.h | 6 ++++++
36
accel/tcg/plugin-gen.c | 2 +-
37
target/sparc/win_helper.c | 26 ++++++++------------------
38
tcg/tcg.c | 5 ++++-
39
include/exec/helper-head.h.inc | 3 +++
40
tcg/riscv/tcg-target.c.inc | 2 +-
41
6 files changed, 23 insertions(+), 21 deletions(-)
42
diff view generated by jsdifflib
1
We copied the data from the general register input to the
1
When allocating new temps during tcg_optmize, do not re-use
2
vector register output, but have not yet replicated it.
2
any EBB temps that were used within the TB. We do not have
3
We intended to fall through into the vector-vector case,
3
any idea what span of the TB in which the temp was live.
4
but failed to redirect the input register.
5
4
6
This is caught by an assertion failure in tcg_out_insn_VRIc,
5
Introduce tcg_temp_ebb_reset_freed and use before tcg_optimize,
7
which diagnosed the incorrect register class.
6
as well as replacing the equivalent in plugin_gen_inject and
7
tcg_func_start.
8
8
9
Cc: qemu-stable@nongnu.org
10
Fixes: fb04ab7ddd8 ("tcg/optimize: Lower TCG_COND_TST{EQ,NE} if unsupported")
11
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2711
12
Reported-by: wannacu <wannacu2049@gmail.com>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
15
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
---
16
---
11
tcg/s390x/tcg-target.c.inc | 1 +
17
include/tcg/tcg-temp-internal.h | 6 ++++++
12
1 file changed, 1 insertion(+)
18
accel/tcg/plugin-gen.c | 2 +-
19
tcg/tcg.c | 5 ++++-
20
3 files changed, 11 insertions(+), 2 deletions(-)
13
21
14
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
22
diff --git a/include/tcg/tcg-temp-internal.h b/include/tcg/tcg-temp-internal.h
15
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/s390x/tcg-target.c.inc
24
--- a/include/tcg/tcg-temp-internal.h
17
+++ b/tcg/s390x/tcg-target.c.inc
25
+++ b/include/tcg/tcg-temp-internal.h
18
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
26
@@ -XXX,XX +XXX,XX @@ TCGv_i64 tcg_temp_ebb_new_i64(void);
19
if (vece == MO_64) {
27
TCGv_ptr tcg_temp_ebb_new_ptr(void);
20
return true;
28
TCGv_i128 tcg_temp_ebb_new_i128(void);
21
}
29
22
+ src = dst;
30
+/* Forget all freed EBB temps, so that new allocations produce new temps. */
31
+static inline void tcg_temp_ebb_reset_freed(TCGContext *s)
32
+{
33
+ memset(s->free_temps, 0, sizeof(s->free_temps));
34
+}
35
+
36
#endif /* TCG_TEMP_FREE_H */
37
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/accel/tcg/plugin-gen.c
40
+++ b/accel/tcg/plugin-gen.c
41
@@ -XXX,XX +XXX,XX @@ static void plugin_gen_inject(struct qemu_plugin_tb *plugin_tb)
42
* that might be live within the existing opcode stream.
43
* The simplest solution is to release them all and create new.
44
*/
45
- memset(tcg_ctx->free_temps, 0, sizeof(tcg_ctx->free_temps));
46
+ tcg_temp_ebb_reset_freed(tcg_ctx);
47
48
QTAILQ_FOREACH_SAFE(op, &tcg_ctx->ops, link, next) {
49
switch (op->opc) {
50
diff --git a/tcg/tcg.c b/tcg/tcg.c
51
index XXXXXXX..XXXXXXX 100644
52
--- a/tcg/tcg.c
53
+++ b/tcg/tcg.c
54
@@ -XXX,XX +XXX,XX @@ void tcg_func_start(TCGContext *s)
55
s->nb_temps = s->nb_globals;
56
57
/* No temps have been previously allocated for size or locality. */
58
- memset(s->free_temps, 0, sizeof(s->free_temps));
59
+ tcg_temp_ebb_reset_freed(s);
60
61
/* No constant temps have been previously allocated. */
62
for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
63
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
23
}
64
}
24
65
#endif
25
/*
66
67
+ /* Do not reuse any EBB that may be allocated within the TB. */
68
+ tcg_temp_ebb_reset_freed(s);
69
+
70
tcg_optimize(s);
71
72
reachable_code_pass(s);
26
--
73
--
27
2.25.1
74
2.43.0
75
76
diff view generated by jsdifflib
1
The operands are output in the wrong order: the tcg selector
1
From: Roman Artemev <roman.artemev@syntacore.com>
2
argument is first, whereas the s390x selector argument is last.
3
2
4
Tested-by: Thomas Huth <thuth@redhat.com>
3
On RISC-V to StoreStore barrier corresponds
5
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/898
4
`fence w, w` not `fence r, r`
6
Fixes: 9bca986df88 ("tcg/s390x: Implement TCG_TARGET_HAS_bitsel_vec")
5
6
Cc: qemu-stable@nongnu.org
7
Fixes: efbea94c76b ("tcg/riscv: Add slowpath load and store instructions")
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Denis Tomashev <denis.tomashev@syntacore.com>
10
Signed-off-by: Roman Artemev <roman.artemev@syntacore.com>
11
Message-ID: <e2f2131e294a49e79959d4fa9ec02cf4@syntacore.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
13
---
9
tcg/s390x/tcg-target.c.inc | 2 +-
14
tcg/riscv/tcg-target.c.inc | 2 +-
10
1 file changed, 1 insertion(+), 1 deletion(-)
15
1 file changed, 1 insertion(+), 1 deletion(-)
11
16
12
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
17
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
13
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/s390x/tcg-target.c.inc
19
--- a/tcg/riscv/tcg-target.c.inc
15
+++ b/tcg/s390x/tcg-target.c.inc
20
+++ b/tcg/riscv/tcg-target.c.inc
16
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
21
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
17
break;
22
insn |= 0x02100000;
18
23
}
19
case INDEX_op_bitsel_vec:
24
if (a0 & TCG_MO_ST_ST) {
20
- tcg_out_insn(s, VRRe, VSEL, a0, a1, a2, args[3]);
25
- insn |= 0x02200000;
21
+ tcg_out_insn(s, VRRe, VSEL, a0, a2, args[3], a1);
26
+ insn |= 0x01100000;
22
break;
27
}
23
28
tcg_out32(s, insn);
24
case INDEX_op_cmp_vec:
29
}
25
--
30
--
26
2.25.1
31
2.43.0
diff view generated by jsdifflib
1
The LDRD (register) instruction is UNPREDICTABLE if the Rm register
1
This allows targets to declare that the helper requires a
2
is the same as either Rt or Rt+1 (the two registers being loaded to).
2
float_status pointer and instead of a generic void pointer.
3
We weren't making sure we avoided this, with the result that on some
4
host CPUs like the Cortex-A7 we would get a SIGILL because the CPU
5
chooses to UNDEF for this particular UNPREDICTABLE case.
6
3
7
Since we've already checked that datalo is aligned, we can simplify
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
8
the test vs the Rm operand by aligning it before comparison. Check
9
for the two orderings before falling back to two ldr instructions.
10
11
We don't bother to do anything similar for tcg_out_ldrd_rwb(),
12
because it is only used in tcg_out_tlb_read() with a fixed set of
13
registers which don't overlap.
14
15
There is no equivalent UNPREDICTABLE case for STRD.
16
17
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
18
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/896
19
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
20
---
6
---
21
tcg/arm/tcg-target.c.inc | 17 +++++++++++++++--
7
include/exec/helper-head.h.inc | 3 +++
22
1 file changed, 15 insertions(+), 2 deletions(-)
8
1 file changed, 3 insertions(+)
23
9
24
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
10
diff --git a/include/exec/helper-head.h.inc b/include/exec/helper-head.h.inc
25
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
26
--- a/tcg/arm/tcg-target.c.inc
12
--- a/include/exec/helper-head.h.inc
27
+++ b/tcg/arm/tcg-target.c.inc
13
+++ b/include/exec/helper-head.h.inc
28
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc,
14
@@ -XXX,XX +XXX,XX @@
29
/* LDRD requires alignment; double-check that. */
15
#define dh_alias_ptr ptr
30
if (get_alignment_bits(opc) >= MO_64
16
#define dh_alias_cptr ptr
31
&& (datalo & 1) == 0 && datahi == datalo + 1) {
17
#define dh_alias_env ptr
32
- tcg_out_ldrd_r(s, COND_AL, datalo, addrlo, addend);
18
+#define dh_alias_fpst ptr
33
- } else if (scratch_addend) {
19
#define dh_alias_void void
34
+ /*
20
#define dh_alias_noreturn noreturn
35
+ * Rm (the second address op) must not overlap Rt or Rt + 1.
21
#define dh_alias(t) glue(dh_alias_, t)
36
+ * Since datalo is aligned, we can simplify the test via alignment.
22
@@ -XXX,XX +XXX,XX @@
37
+ * Flip the two address arguments if that works.
23
#define dh_ctype_ptr void *
38
+ */
24
#define dh_ctype_cptr const void *
39
+ if ((addend & ~1) != datalo) {
25
#define dh_ctype_env CPUArchState *
40
+ tcg_out_ldrd_r(s, COND_AL, datalo, addrlo, addend);
26
+#define dh_ctype_fpst float_status *
41
+ break;
27
#define dh_ctype_void void
42
+ }
28
#define dh_ctype_noreturn G_NORETURN void
43
+ if ((addrlo & ~1) != datalo) {
29
#define dh_ctype(t) dh_ctype_##t
44
+ tcg_out_ldrd_r(s, COND_AL, datalo, addend, addrlo);
30
@@ -XXX,XX +XXX,XX @@
45
+ break;
31
#define dh_typecode_f64 dh_typecode_i64
46
+ }
32
#define dh_typecode_cptr dh_typecode_ptr
47
+ }
33
#define dh_typecode_env dh_typecode_ptr
48
+ if (scratch_addend) {
34
+#define dh_typecode_fpst dh_typecode_ptr
49
tcg_out_ld32_rwb(s, COND_AL, datalo, addend, addrlo);
35
#define dh_typecode(t) dh_typecode_##t
50
tcg_out_ld32_12(s, COND_AL, datahi, addend, 4);
36
51
} else {
37
#define dh_callflag_i32 0
52
--
38
--
53
2.25.1
39
2.43.0
54
40
55
41
diff view generated by jsdifflib
1
The immediate operands to VGM were in the wrong order,
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
producing an inverse mask.
3
2
3
Rather than manually copying each register, use
4
the libc memcpy(), which is well optimized nowadays.
5
6
Suggested-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
7
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
Message-ID: <20241205205418.67613-1-philmd@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
12
---
6
tcg/s390x/tcg-target.c.inc | 4 ++--
13
target/sparc/win_helper.c | 26 ++++++++------------------
7
1 file changed, 2 insertions(+), 2 deletions(-)
14
1 file changed, 8 insertions(+), 18 deletions(-)
8
15
9
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
16
diff --git a/target/sparc/win_helper.c b/target/sparc/win_helper.c
10
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/s390x/tcg-target.c.inc
18
--- a/target/sparc/win_helper.c
12
+++ b/tcg/s390x/tcg-target.c.inc
19
+++ b/target/sparc/win_helper.c
13
@@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
20
@@ -XXX,XX +XXX,XX @@
14
msb = clz32(val);
21
#include "exec/helper-proto.h"
15
lsb = 31 - ctz32(val);
22
#include "trace.h"
16
}
23
17
- tcg_out_insn(s, VRIb, VGM, dst, lsb, msb, MO_32);
24
-static inline void memcpy32(target_ulong *dst, const target_ulong *src)
18
+ tcg_out_insn(s, VRIb, VGM, dst, msb, lsb, MO_32);
25
-{
19
return;
26
- dst[0] = src[0];
20
}
27
- dst[1] = src[1];
28
- dst[2] = src[2];
29
- dst[3] = src[3];
30
- dst[4] = src[4];
31
- dst[5] = src[5];
32
- dst[6] = src[6];
33
- dst[7] = src[7];
34
-}
35
-
36
void cpu_set_cwp(CPUSPARCState *env, int new_cwp)
37
{
38
/* put the modified wrap registers at their proper location */
39
if (env->cwp == env->nwindows - 1) {
40
- memcpy32(env->regbase, env->regbase + env->nwindows * 16);
41
+ memcpy(env->regbase, env->regbase + env->nwindows * 16,
42
+ sizeof(env->gregs));
43
}
44
env->cwp = new_cwp;
45
46
/* put the wrap registers at their temporary location */
47
if (new_cwp == env->nwindows - 1) {
48
- memcpy32(env->regbase + env->nwindows * 16, env->regbase);
49
+ memcpy(env->regbase + env->nwindows * 16, env->regbase,
50
+ sizeof(env->gregs));
51
}
52
env->regwptr = env->regbase + (new_cwp * 16);
53
}
54
@@ -XXX,XX +XXX,XX @@ void cpu_gl_switch_gregs(CPUSPARCState *env, uint32_t new_gl)
55
dst = get_gl_gregset(env, env->gl);
56
57
if (src != dst) {
58
- memcpy32(dst, env->gregs);
59
- memcpy32(env->gregs, src);
60
+ memcpy(dst, env->gregs, sizeof(env->gregs));
61
+ memcpy(env->gregs, src, sizeof(env->gregs));
62
}
63
}
64
65
@@ -XXX,XX +XXX,XX @@ void cpu_change_pstate(CPUSPARCState *env, uint32_t new_pstate)
66
/* Switch global register bank */
67
src = get_gregset(env, new_pstate_regs);
68
dst = get_gregset(env, pstate_regs);
69
- memcpy32(dst, env->gregs);
70
- memcpy32(env->gregs, src);
71
+ memcpy(dst, env->gregs, sizeof(env->gregs));
72
+ memcpy(env->gregs, src, sizeof(env->gregs));
21
} else {
73
} else {
22
@@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
74
trace_win_helper_no_switch_pstate(new_pstate_regs);
23
msb = clz64(val);
24
lsb = 63 - ctz64(val);
25
}
26
- tcg_out_insn(s, VRIb, VGM, dst, lsb, msb, MO_64);
27
+ tcg_out_insn(s, VRIb, VGM, dst, msb, lsb, MO_64);
28
return;
29
}
30
}
75
}
31
--
76
--
32
2.25.1
77
2.43.0
78
79
diff view generated by jsdifflib