1 | The following changes since commit 15df33ceb73cb6bb3c6736cf4d2cff51129ed4b4: | 1 | Pretty small still, but there are two patches that ought |
---|---|---|---|
2 | to get backported to stable, so no point in delaying. | ||
2 | 3 | ||
3 | Merge remote-tracking branch 'remotes/quic/tags/pull-hex-20220312-1' into staging (2022-03-13 17:29:18 +0000) | 4 | r~ |
5 | |||
6 | The following changes since commit a5ba0a7e4e150d1350a041f0d0ef9ca6c8d7c307: | ||
7 | |||
8 | Merge tag 'pull-aspeed-20241211' of https://github.com/legoater/qemu into staging (2024-12-11 15:16:47 +0000) | ||
4 | 9 | ||
5 | are available in the Git repository at: | 10 | are available in the Git repository at: |
6 | 11 | ||
7 | https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20220314 | 12 | https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20241212 |
8 | 13 | ||
9 | for you to fetch changes up to 76cff100beeae8d3676bb658cccd45ef5ced8aa9: | 14 | for you to fetch changes up to 7ac87b14a92234b6a89b701b4043ad6cf8bdcccf: |
10 | 15 | ||
11 | tcg/arm: Don't emit UNPREDICTABLE LDRD with Rm == Rt or Rt+1 (2022-03-14 10:31:51 -0700) | 16 | target/sparc: Use memcpy() and remove memcpy32() (2024-12-12 14:28:38 -0600) |
12 | 17 | ||
13 | ---------------------------------------------------------------- | 18 | ---------------------------------------------------------------- |
14 | Fixes for s390x host vectors | 19 | tcg: Reset free_temps before tcg_optimize |
15 | Fix for arm ldrd unpredictable case | 20 | tcg/riscv: Fix StoreStore barrier generation |
21 | include/exec: Introduce fpst alias in helper-head.h.inc | ||
22 | target/sparc: Use memcpy() and remove memcpy32() | ||
16 | 23 | ||
17 | ---------------------------------------------------------------- | 24 | ---------------------------------------------------------------- |
18 | Richard Henderson (4): | 25 | Philippe Mathieu-Daudé (1): |
19 | tcg/s390x: Fix tcg_out_dupi_vec vs VGM | 26 | target/sparc: Use memcpy() and remove memcpy32() |
20 | tcg/s390x: Fix INDEX_op_bitsel_vec vs VSEL | ||
21 | tcg/s390x: Fix tcg_out_dup_vec vs general registers | ||
22 | tcg/arm: Don't emit UNPREDICTABLE LDRD with Rm == Rt or Rt+1 | ||
23 | 27 | ||
24 | tcg/arm/tcg-target.c.inc | 17 +++++++++++++++-- | 28 | Richard Henderson (2): |
25 | tcg/s390x/tcg-target.c.inc | 7 ++++--- | 29 | tcg: Reset free_temps before tcg_optimize |
26 | 2 files changed, 19 insertions(+), 5 deletions(-) | 30 | include/exec: Introduce fpst alias in helper-head.h.inc |
31 | |||
32 | Roman Artemev (1): | ||
33 | tcg/riscv: Fix StoreStore barrier generation | ||
34 | |||
35 | include/tcg/tcg-temp-internal.h | 6 ++++++ | ||
36 | accel/tcg/plugin-gen.c | 2 +- | ||
37 | target/sparc/win_helper.c | 26 ++++++++------------------ | ||
38 | tcg/tcg.c | 5 ++++- | ||
39 | include/exec/helper-head.h.inc | 3 +++ | ||
40 | tcg/riscv/tcg-target.c.inc | 2 +- | ||
41 | 6 files changed, 23 insertions(+), 21 deletions(-) | ||
42 | diff view generated by jsdifflib |
1 | We copied the data from the general register input to the | 1 | When allocating new temps during tcg_optmize, do not re-use |
---|---|---|---|
2 | vector register output, but have not yet replicated it. | 2 | any EBB temps that were used within the TB. We do not have |
3 | We intended to fall through into the vector-vector case, | 3 | any idea what span of the TB in which the temp was live. |
4 | but failed to redirect the input register. | ||
5 | 4 | ||
6 | This is caught by an assertion failure in tcg_out_insn_VRIc, | 5 | Introduce tcg_temp_ebb_reset_freed and use before tcg_optimize, |
7 | which diagnosed the incorrect register class. | 6 | as well as replacing the equivalent in plugin_gen_inject and |
7 | tcg_func_start. | ||
8 | 8 | ||
9 | Cc: qemu-stable@nongnu.org | ||
10 | Fixes: fb04ab7ddd8 ("tcg/optimize: Lower TCG_COND_TST{EQ,NE} if unsupported") | ||
11 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2711 | ||
12 | Reported-by: wannacu <wannacu2049@gmail.com> | ||
9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 13 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
14 | Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> | ||
15 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
10 | --- | 16 | --- |
11 | tcg/s390x/tcg-target.c.inc | 1 + | 17 | include/tcg/tcg-temp-internal.h | 6 ++++++ |
12 | 1 file changed, 1 insertion(+) | 18 | accel/tcg/plugin-gen.c | 2 +- |
19 | tcg/tcg.c | 5 ++++- | ||
20 | 3 files changed, 11 insertions(+), 2 deletions(-) | ||
13 | 21 | ||
14 | diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc | 22 | diff --git a/include/tcg/tcg-temp-internal.h b/include/tcg/tcg-temp-internal.h |
15 | index XXXXXXX..XXXXXXX 100644 | 23 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/tcg/s390x/tcg-target.c.inc | 24 | --- a/include/tcg/tcg-temp-internal.h |
17 | +++ b/tcg/s390x/tcg-target.c.inc | 25 | +++ b/include/tcg/tcg-temp-internal.h |
18 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, | 26 | @@ -XXX,XX +XXX,XX @@ TCGv_i64 tcg_temp_ebb_new_i64(void); |
19 | if (vece == MO_64) { | 27 | TCGv_ptr tcg_temp_ebb_new_ptr(void); |
20 | return true; | 28 | TCGv_i128 tcg_temp_ebb_new_i128(void); |
21 | } | 29 | |
22 | + src = dst; | 30 | +/* Forget all freed EBB temps, so that new allocations produce new temps. */ |
31 | +static inline void tcg_temp_ebb_reset_freed(TCGContext *s) | ||
32 | +{ | ||
33 | + memset(s->free_temps, 0, sizeof(s->free_temps)); | ||
34 | +} | ||
35 | + | ||
36 | #endif /* TCG_TEMP_FREE_H */ | ||
37 | diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/accel/tcg/plugin-gen.c | ||
40 | +++ b/accel/tcg/plugin-gen.c | ||
41 | @@ -XXX,XX +XXX,XX @@ static void plugin_gen_inject(struct qemu_plugin_tb *plugin_tb) | ||
42 | * that might be live within the existing opcode stream. | ||
43 | * The simplest solution is to release them all and create new. | ||
44 | */ | ||
45 | - memset(tcg_ctx->free_temps, 0, sizeof(tcg_ctx->free_temps)); | ||
46 | + tcg_temp_ebb_reset_freed(tcg_ctx); | ||
47 | |||
48 | QTAILQ_FOREACH_SAFE(op, &tcg_ctx->ops, link, next) { | ||
49 | switch (op->opc) { | ||
50 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
51 | index XXXXXXX..XXXXXXX 100644 | ||
52 | --- a/tcg/tcg.c | ||
53 | +++ b/tcg/tcg.c | ||
54 | @@ -XXX,XX +XXX,XX @@ void tcg_func_start(TCGContext *s) | ||
55 | s->nb_temps = s->nb_globals; | ||
56 | |||
57 | /* No temps have been previously allocated for size or locality. */ | ||
58 | - memset(s->free_temps, 0, sizeof(s->free_temps)); | ||
59 | + tcg_temp_ebb_reset_freed(s); | ||
60 | |||
61 | /* No constant temps have been previously allocated. */ | ||
62 | for (int i = 0; i < TCG_TYPE_COUNT; ++i) { | ||
63 | @@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) | ||
23 | } | 64 | } |
24 | 65 | #endif | |
25 | /* | 66 | |
67 | + /* Do not reuse any EBB that may be allocated within the TB. */ | ||
68 | + tcg_temp_ebb_reset_freed(s); | ||
69 | + | ||
70 | tcg_optimize(s); | ||
71 | |||
72 | reachable_code_pass(s); | ||
26 | -- | 73 | -- |
27 | 2.25.1 | 74 | 2.43.0 |
75 | |||
76 | diff view generated by jsdifflib |
1 | The operands are output in the wrong order: the tcg selector | 1 | From: Roman Artemev <roman.artemev@syntacore.com> |
---|---|---|---|
2 | argument is first, whereas the s390x selector argument is last. | ||
3 | 2 | ||
4 | Tested-by: Thomas Huth <thuth@redhat.com> | 3 | On RISC-V to StoreStore barrier corresponds |
5 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/898 | 4 | `fence w, w` not `fence r, r` |
6 | Fixes: 9bca986df88 ("tcg/s390x: Implement TCG_TARGET_HAS_bitsel_vec") | 5 | |
6 | Cc: qemu-stable@nongnu.org | ||
7 | Fixes: efbea94c76b ("tcg/riscv: Add slowpath load and store instructions") | ||
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | Signed-off-by: Denis Tomashev <denis.tomashev@syntacore.com> | ||
10 | Signed-off-by: Roman Artemev <roman.artemev@syntacore.com> | ||
11 | Message-ID: <e2f2131e294a49e79959d4fa9ec02cf4@syntacore.com> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 12 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
8 | --- | 13 | --- |
9 | tcg/s390x/tcg-target.c.inc | 2 +- | 14 | tcg/riscv/tcg-target.c.inc | 2 +- |
10 | 1 file changed, 1 insertion(+), 1 deletion(-) | 15 | 1 file changed, 1 insertion(+), 1 deletion(-) |
11 | 16 | ||
12 | diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc | 17 | diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc |
13 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/tcg/s390x/tcg-target.c.inc | 19 | --- a/tcg/riscv/tcg-target.c.inc |
15 | +++ b/tcg/s390x/tcg-target.c.inc | 20 | +++ b/tcg/riscv/tcg-target.c.inc |
16 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, | 21 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_mb(TCGContext *s, TCGArg a0) |
17 | break; | 22 | insn |= 0x02100000; |
18 | 23 | } | |
19 | case INDEX_op_bitsel_vec: | 24 | if (a0 & TCG_MO_ST_ST) { |
20 | - tcg_out_insn(s, VRRe, VSEL, a0, a1, a2, args[3]); | 25 | - insn |= 0x02200000; |
21 | + tcg_out_insn(s, VRRe, VSEL, a0, a2, args[3], a1); | 26 | + insn |= 0x01100000; |
22 | break; | 27 | } |
23 | 28 | tcg_out32(s, insn); | |
24 | case INDEX_op_cmp_vec: | 29 | } |
25 | -- | 30 | -- |
26 | 2.25.1 | 31 | 2.43.0 | diff view generated by jsdifflib |
1 | The LDRD (register) instruction is UNPREDICTABLE if the Rm register | 1 | This allows targets to declare that the helper requires a |
---|---|---|---|
2 | is the same as either Rt or Rt+1 (the two registers being loaded to). | 2 | float_status pointer and instead of a generic void pointer. |
3 | We weren't making sure we avoided this, with the result that on some | ||
4 | host CPUs like the Cortex-A7 we would get a SIGILL because the CPU | ||
5 | chooses to UNDEF for this particular UNPREDICTABLE case. | ||
6 | 3 | ||
7 | Since we've already checked that datalo is aligned, we can simplify | 4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
8 | the test vs the Rm operand by aligning it before comparison. Check | ||
9 | for the two orderings before falling back to two ldr instructions. | ||
10 | |||
11 | We don't bother to do anything similar for tcg_out_ldrd_rwb(), | ||
12 | because it is only used in tcg_out_tlb_read() with a fixed set of | ||
13 | registers which don't overlap. | ||
14 | |||
15 | There is no equivalent UNPREDICTABLE case for STRD. | ||
16 | |||
17 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
18 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/896 | ||
19 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
20 | --- | 6 | --- |
21 | tcg/arm/tcg-target.c.inc | 17 +++++++++++++++-- | 7 | include/exec/helper-head.h.inc | 3 +++ |
22 | 1 file changed, 15 insertions(+), 2 deletions(-) | 8 | 1 file changed, 3 insertions(+) |
23 | 9 | ||
24 | diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc | 10 | diff --git a/include/exec/helper-head.h.inc b/include/exec/helper-head.h.inc |
25 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
26 | --- a/tcg/arm/tcg-target.c.inc | 12 | --- a/include/exec/helper-head.h.inc |
27 | +++ b/tcg/arm/tcg-target.c.inc | 13 | +++ b/include/exec/helper-head.h.inc |
28 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc, | 14 | @@ -XXX,XX +XXX,XX @@ |
29 | /* LDRD requires alignment; double-check that. */ | 15 | #define dh_alias_ptr ptr |
30 | if (get_alignment_bits(opc) >= MO_64 | 16 | #define dh_alias_cptr ptr |
31 | && (datalo & 1) == 0 && datahi == datalo + 1) { | 17 | #define dh_alias_env ptr |
32 | - tcg_out_ldrd_r(s, COND_AL, datalo, addrlo, addend); | 18 | +#define dh_alias_fpst ptr |
33 | - } else if (scratch_addend) { | 19 | #define dh_alias_void void |
34 | + /* | 20 | #define dh_alias_noreturn noreturn |
35 | + * Rm (the second address op) must not overlap Rt or Rt + 1. | 21 | #define dh_alias(t) glue(dh_alias_, t) |
36 | + * Since datalo is aligned, we can simplify the test via alignment. | 22 | @@ -XXX,XX +XXX,XX @@ |
37 | + * Flip the two address arguments if that works. | 23 | #define dh_ctype_ptr void * |
38 | + */ | 24 | #define dh_ctype_cptr const void * |
39 | + if ((addend & ~1) != datalo) { | 25 | #define dh_ctype_env CPUArchState * |
40 | + tcg_out_ldrd_r(s, COND_AL, datalo, addrlo, addend); | 26 | +#define dh_ctype_fpst float_status * |
41 | + break; | 27 | #define dh_ctype_void void |
42 | + } | 28 | #define dh_ctype_noreturn G_NORETURN void |
43 | + if ((addrlo & ~1) != datalo) { | 29 | #define dh_ctype(t) dh_ctype_##t |
44 | + tcg_out_ldrd_r(s, COND_AL, datalo, addend, addrlo); | 30 | @@ -XXX,XX +XXX,XX @@ |
45 | + break; | 31 | #define dh_typecode_f64 dh_typecode_i64 |
46 | + } | 32 | #define dh_typecode_cptr dh_typecode_ptr |
47 | + } | 33 | #define dh_typecode_env dh_typecode_ptr |
48 | + if (scratch_addend) { | 34 | +#define dh_typecode_fpst dh_typecode_ptr |
49 | tcg_out_ld32_rwb(s, COND_AL, datalo, addend, addrlo); | 35 | #define dh_typecode(t) dh_typecode_##t |
50 | tcg_out_ld32_12(s, COND_AL, datahi, addend, 4); | 36 | |
51 | } else { | 37 | #define dh_callflag_i32 0 |
52 | -- | 38 | -- |
53 | 2.25.1 | 39 | 2.43.0 |
54 | 40 | ||
55 | 41 | diff view generated by jsdifflib |
1 | The immediate operands to VGM were in the wrong order, | 1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> |
---|---|---|---|
2 | producing an inverse mask. | ||
3 | 2 | ||
3 | Rather than manually copying each register, use | ||
4 | the libc memcpy(), which is well optimized nowadays. | ||
5 | |||
6 | Suggested-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> | ||
7 | Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> | ||
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
10 | Message-ID: <20241205205418.67613-1-philmd@linaro.org> | ||
4 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
5 | --- | 12 | --- |
6 | tcg/s390x/tcg-target.c.inc | 4 ++-- | 13 | target/sparc/win_helper.c | 26 ++++++++------------------ |
7 | 1 file changed, 2 insertions(+), 2 deletions(-) | 14 | 1 file changed, 8 insertions(+), 18 deletions(-) |
8 | 15 | ||
9 | diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc | 16 | diff --git a/target/sparc/win_helper.c b/target/sparc/win_helper.c |
10 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
11 | --- a/tcg/s390x/tcg-target.c.inc | 18 | --- a/target/sparc/win_helper.c |
12 | +++ b/tcg/s390x/tcg-target.c.inc | 19 | +++ b/target/sparc/win_helper.c |
13 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, | 20 | @@ -XXX,XX +XXX,XX @@ |
14 | msb = clz32(val); | 21 | #include "exec/helper-proto.h" |
15 | lsb = 31 - ctz32(val); | 22 | #include "trace.h" |
16 | } | 23 | |
17 | - tcg_out_insn(s, VRIb, VGM, dst, lsb, msb, MO_32); | 24 | -static inline void memcpy32(target_ulong *dst, const target_ulong *src) |
18 | + tcg_out_insn(s, VRIb, VGM, dst, msb, lsb, MO_32); | 25 | -{ |
19 | return; | 26 | - dst[0] = src[0]; |
20 | } | 27 | - dst[1] = src[1]; |
28 | - dst[2] = src[2]; | ||
29 | - dst[3] = src[3]; | ||
30 | - dst[4] = src[4]; | ||
31 | - dst[5] = src[5]; | ||
32 | - dst[6] = src[6]; | ||
33 | - dst[7] = src[7]; | ||
34 | -} | ||
35 | - | ||
36 | void cpu_set_cwp(CPUSPARCState *env, int new_cwp) | ||
37 | { | ||
38 | /* put the modified wrap registers at their proper location */ | ||
39 | if (env->cwp == env->nwindows - 1) { | ||
40 | - memcpy32(env->regbase, env->regbase + env->nwindows * 16); | ||
41 | + memcpy(env->regbase, env->regbase + env->nwindows * 16, | ||
42 | + sizeof(env->gregs)); | ||
43 | } | ||
44 | env->cwp = new_cwp; | ||
45 | |||
46 | /* put the wrap registers at their temporary location */ | ||
47 | if (new_cwp == env->nwindows - 1) { | ||
48 | - memcpy32(env->regbase + env->nwindows * 16, env->regbase); | ||
49 | + memcpy(env->regbase + env->nwindows * 16, env->regbase, | ||
50 | + sizeof(env->gregs)); | ||
51 | } | ||
52 | env->regwptr = env->regbase + (new_cwp * 16); | ||
53 | } | ||
54 | @@ -XXX,XX +XXX,XX @@ void cpu_gl_switch_gregs(CPUSPARCState *env, uint32_t new_gl) | ||
55 | dst = get_gl_gregset(env, env->gl); | ||
56 | |||
57 | if (src != dst) { | ||
58 | - memcpy32(dst, env->gregs); | ||
59 | - memcpy32(env->gregs, src); | ||
60 | + memcpy(dst, env->gregs, sizeof(env->gregs)); | ||
61 | + memcpy(env->gregs, src, sizeof(env->gregs)); | ||
62 | } | ||
63 | } | ||
64 | |||
65 | @@ -XXX,XX +XXX,XX @@ void cpu_change_pstate(CPUSPARCState *env, uint32_t new_pstate) | ||
66 | /* Switch global register bank */ | ||
67 | src = get_gregset(env, new_pstate_regs); | ||
68 | dst = get_gregset(env, pstate_regs); | ||
69 | - memcpy32(dst, env->gregs); | ||
70 | - memcpy32(env->gregs, src); | ||
71 | + memcpy(dst, env->gregs, sizeof(env->gregs)); | ||
72 | + memcpy(env->gregs, src, sizeof(env->gregs)); | ||
21 | } else { | 73 | } else { |
22 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, | 74 | trace_win_helper_no_switch_pstate(new_pstate_regs); |
23 | msb = clz64(val); | ||
24 | lsb = 63 - ctz64(val); | ||
25 | } | ||
26 | - tcg_out_insn(s, VRIb, VGM, dst, lsb, msb, MO_64); | ||
27 | + tcg_out_insn(s, VRIb, VGM, dst, msb, lsb, MO_64); | ||
28 | return; | ||
29 | } | ||
30 | } | 75 | } |
31 | -- | 76 | -- |
32 | 2.25.1 | 77 | 2.43.0 |
78 | |||
79 | diff view generated by jsdifflib |