1 | Pretty small still, but there are two patches that ought | 1 | V2 replaces the tcg const temp overflow patch. |
---|---|---|---|
2 | to get backported to stable, so no point in delaying. | 2 | |
3 | 3 | ||
4 | r~ | 4 | r~ |
5 | 5 | ||
6 | The following changes since commit a5ba0a7e4e150d1350a041f0d0ef9ca6c8d7c307: | ||
7 | 6 | ||
8 | Merge tag 'pull-aspeed-20241211' of https://github.com/legoater/qemu into staging (2024-12-11 15:16:47 +0000) | 7 | The following changes since commit 0e32462630687a18039464511bd0447ada5709c3: |
8 | |||
9 | Merge remote-tracking branch 'remotes/vivier2/tags/linux-user-for-6.0-pull-request' into staging (2021-01-22 10:35:55 +0000) | ||
9 | 10 | ||
10 | are available in the Git repository at: | 11 | are available in the Git repository at: |
11 | 12 | ||
12 | https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20241212 | 13 | https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20210124 |
13 | 14 | ||
14 | for you to fetch changes up to 7ac87b14a92234b6a89b701b4043ad6cf8bdcccf: | 15 | for you to fetch changes up to ae30e86661b0f48562cd95918d37cbeec5d02262: |
15 | 16 | ||
16 | target/sparc: Use memcpy() and remove memcpy32() (2024-12-12 14:28:38 -0600) | 17 | tcg: Restart code generation when we run out of temps (2021-01-24 08:03:27 -1000) |
17 | 18 | ||
18 | ---------------------------------------------------------------- | 19 | ---------------------------------------------------------------- |
19 | tcg: Reset free_temps before tcg_optimize | 20 | Fix tcg constant temp overflow. |
20 | tcg/riscv: Fix StoreStore barrier generation | 21 | Fix running during atomic single-step. |
21 | include/exec: Introduce fpst alias in helper-head.h.inc | 22 | Partial support for apple silicon. |
22 | target/sparc: Use memcpy() and remove memcpy32() | 23 | Cleanups for accel/tcg. |
23 | 24 | ||
24 | ---------------------------------------------------------------- | 25 | ---------------------------------------------------------------- |
25 | Philippe Mathieu-Daudé (1): | 26 | Douglas Crosher (1): |
26 | target/sparc: Use memcpy() and remove memcpy32() | 27 | tcg: update the cpu running flag in cpu_exec_step_atomic |
27 | 28 | ||
28 | Richard Henderson (2): | 29 | Philippe Mathieu-Daudé (4): |
29 | tcg: Reset free_temps before tcg_optimize | 30 | accel/tcg: Make cpu_gen_init() static |
30 | include/exec: Introduce fpst alias in helper-head.h.inc | 31 | accel/tcg: Restrict tb_gen_code() from other accelerators |
32 | accel/tcg: Declare missing cpu_loop_exit*() stubs | ||
33 | accel/tcg: Restrict cpu_io_recompile() from other accelerators | ||
31 | 34 | ||
32 | Roman Artemev (1): | 35 | Richard Henderson (4): |
33 | tcg/riscv: Fix StoreStore barrier generation | 36 | qemu/compiler: Split out qemu_build_not_reached_always |
37 | tcg: Optimize inline dup_const for MO_64 | ||
38 | accel/tcg: Move tb_flush_jmp_cache() to cputlb.c | ||
39 | tcg: Restart code generation when we run out of temps | ||
34 | 40 | ||
35 | include/tcg/tcg-temp-internal.h | 6 ++++++ | 41 | Roman Bolshakov (1): |
36 | accel/tcg/plugin-gen.c | 2 +- | 42 | tcg: Toggle page execution for Apple Silicon |
37 | target/sparc/win_helper.c | 26 ++++++++------------------ | ||
38 | tcg/tcg.c | 5 ++++- | ||
39 | include/exec/helper-head.h.inc | 3 +++ | ||
40 | tcg/riscv/tcg-target.c.inc | 2 +- | ||
41 | 6 files changed, 23 insertions(+), 21 deletions(-) | ||
42 | 43 | ||
44 | accel/tcg/internal.h | 20 ++++++++++++++++++++ | ||
45 | include/exec/exec-all.h | 11 ----------- | ||
46 | include/qemu/compiler.h | 5 +++-- | ||
47 | include/qemu/osdep.h | 28 ++++++++++++++++++++++++++++ | ||
48 | include/tcg/tcg.h | 6 +++++- | ||
49 | accel/stubs/tcg-stub.c | 10 ++++++++++ | ||
50 | accel/tcg/cpu-exec.c | 7 +++++++ | ||
51 | accel/tcg/cputlb.c | 19 +++++++++++++++++++ | ||
52 | accel/tcg/translate-all.c | 38 +++++++++++++++++++------------------- | ||
53 | tcg/tcg.c | 12 +++++++++--- | ||
54 | 10 files changed, 120 insertions(+), 36 deletions(-) | ||
55 | create mode 100644 accel/tcg/internal.h | ||
56 | diff view generated by jsdifflib |
1 | When allocating new temps during tcg_optmize, do not re-use | 1 | Some large translation blocks can generate so many unique |
---|---|---|---|
2 | any EBB temps that were used within the TB. We do not have | 2 | constants that we run out of temps to hold them. In this |
3 | any idea what span of the TB in which the temp was live. | 3 | case, longjmp back to the start of code generation and |
4 | restart with a smaller translation block. | ||
4 | 5 | ||
5 | Introduce tcg_temp_ebb_reset_freed and use before tcg_optimize, | 6 | Buglink: https://bugs.launchpad.net/bugs/1912065 |
6 | as well as replacing the equivalent in plugin_gen_inject and | 7 | Tested-by: BALATON Zoltan <balaton@eik.bme.hu> |
7 | tcg_func_start. | 8 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> |
9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | --- | ||
11 | include/tcg/tcg.h | 3 +++ | ||
12 | accel/tcg/translate-all.c | 15 ++++++++++++++- | ||
13 | tcg/tcg.c | 11 ++++++++--- | ||
14 | 3 files changed, 25 insertions(+), 4 deletions(-) | ||
8 | 15 | ||
9 | Cc: qemu-stable@nongnu.org | 16 | diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h |
10 | Fixes: fb04ab7ddd8 ("tcg/optimize: Lower TCG_COND_TST{EQ,NE} if unsupported") | ||
11 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2711 | ||
12 | Reported-by: wannacu <wannacu2049@gmail.com> | ||
13 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
14 | Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> | ||
15 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
16 | --- | ||
17 | include/tcg/tcg-temp-internal.h | 6 ++++++ | ||
18 | accel/tcg/plugin-gen.c | 2 +- | ||
19 | tcg/tcg.c | 5 ++++- | ||
20 | 3 files changed, 11 insertions(+), 2 deletions(-) | ||
21 | |||
22 | diff --git a/include/tcg/tcg-temp-internal.h b/include/tcg/tcg-temp-internal.h | ||
23 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
24 | --- a/include/tcg/tcg-temp-internal.h | 18 | --- a/include/tcg/tcg.h |
25 | +++ b/include/tcg/tcg-temp-internal.h | 19 | +++ b/include/tcg/tcg.h |
26 | @@ -XXX,XX +XXX,XX @@ TCGv_i64 tcg_temp_ebb_new_i64(void); | 20 | @@ -XXX,XX +XXX,XX @@ struct TCGContext { |
27 | TCGv_ptr tcg_temp_ebb_new_ptr(void); | 21 | |
28 | TCGv_i128 tcg_temp_ebb_new_i128(void); | 22 | uint16_t gen_insn_end_off[TCG_MAX_INSNS]; |
29 | 23 | target_ulong gen_insn_data[TCG_MAX_INSNS][TARGET_INSN_START_WORDS]; | |
30 | +/* Forget all freed EBB temps, so that new allocations produce new temps. */ | ||
31 | +static inline void tcg_temp_ebb_reset_freed(TCGContext *s) | ||
32 | +{ | ||
33 | + memset(s->free_temps, 0, sizeof(s->free_temps)); | ||
34 | +} | ||
35 | + | 24 | + |
36 | #endif /* TCG_TEMP_FREE_H */ | 25 | + /* Exit to translator on overflow. */ |
37 | diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c | 26 | + sigjmp_buf jmp_trans; |
27 | }; | ||
28 | |||
29 | static inline bool temp_readonly(TCGTemp *ts) | ||
30 | diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | 31 | index XXXXXXX..XXXXXXX 100644 |
39 | --- a/accel/tcg/plugin-gen.c | 32 | --- a/accel/tcg/translate-all.c |
40 | +++ b/accel/tcg/plugin-gen.c | 33 | +++ b/accel/tcg/translate-all.c |
41 | @@ -XXX,XX +XXX,XX @@ static void plugin_gen_inject(struct qemu_plugin_tb *plugin_tb) | 34 | @@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu, |
42 | * that might be live within the existing opcode stream. | 35 | ti = profile_getclock(); |
43 | * The simplest solution is to release them all and create new. | 36 | #endif |
44 | */ | 37 | |
45 | - memset(tcg_ctx->free_temps, 0, sizeof(tcg_ctx->free_temps)); | 38 | + gen_code_size = sigsetjmp(tcg_ctx->jmp_trans, 0); |
46 | + tcg_temp_ebb_reset_freed(tcg_ctx); | 39 | + if (unlikely(gen_code_size != 0)) { |
47 | 40 | + goto error_return; | |
48 | QTAILQ_FOREACH_SAFE(op, &tcg_ctx->ops, link, next) { | 41 | + } |
49 | switch (op->opc) { | 42 | + |
43 | tcg_func_start(tcg_ctx); | ||
44 | |||
45 | tcg_ctx->cpu = env_cpu(env); | ||
46 | gen_intermediate_code(cpu, tb, max_insns); | ||
47 | tcg_ctx->cpu = NULL; | ||
48 | + max_insns = tb->icount; | ||
49 | |||
50 | trace_translate_block(tb, tb->pc, tb->tc.ptr); | ||
51 | |||
52 | @@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu, | ||
53 | |||
54 | gen_code_size = tcg_gen_code(tcg_ctx, tb); | ||
55 | if (unlikely(gen_code_size < 0)) { | ||
56 | + error_return: | ||
57 | switch (gen_code_size) { | ||
58 | case -1: | ||
59 | /* | ||
60 | @@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu, | ||
61 | * flush the TBs, allocate a new TB, re-initialize it per | ||
62 | * above, and re-do the actual code generation. | ||
63 | */ | ||
64 | + qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT, | ||
65 | + "Restarting code generation for " | ||
66 | + "code_gen_buffer overflow\n"); | ||
67 | goto buffer_overflow; | ||
68 | |||
69 | case -2: | ||
70 | @@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu, | ||
71 | * Try again with half as many insns as we attempted this time. | ||
72 | * If a single insn overflows, there's a bug somewhere... | ||
73 | */ | ||
74 | - max_insns = tb->icount; | ||
75 | assert(max_insns > 1); | ||
76 | max_insns /= 2; | ||
77 | + qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT, | ||
78 | + "Restarting code generation with " | ||
79 | + "smaller translation block (max %d insns)\n", | ||
80 | + max_insns); | ||
81 | goto tb_overflow; | ||
82 | |||
83 | default: | ||
50 | diff --git a/tcg/tcg.c b/tcg/tcg.c | 84 | diff --git a/tcg/tcg.c b/tcg/tcg.c |
51 | index XXXXXXX..XXXXXXX 100644 | 85 | index XXXXXXX..XXXXXXX 100644 |
52 | --- a/tcg/tcg.c | 86 | --- a/tcg/tcg.c |
53 | +++ b/tcg/tcg.c | 87 | +++ b/tcg/tcg.c |
54 | @@ -XXX,XX +XXX,XX @@ void tcg_func_start(TCGContext *s) | 88 | @@ -XXX,XX +XXX,XX @@ void tcg_func_start(TCGContext *s) |
55 | s->nb_temps = s->nb_globals; | 89 | QSIMPLEQ_INIT(&s->labels); |
56 | 90 | } | |
57 | /* No temps have been previously allocated for size or locality. */ | 91 | |
58 | - memset(s->free_temps, 0, sizeof(s->free_temps)); | 92 | -static inline TCGTemp *tcg_temp_alloc(TCGContext *s) |
59 | + tcg_temp_ebb_reset_freed(s); | 93 | +static TCGTemp *tcg_temp_alloc(TCGContext *s) |
60 | 94 | { | |
61 | /* No constant temps have been previously allocated. */ | 95 | int n = s->nb_temps++; |
62 | for (int i = 0; i < TCG_TYPE_COUNT; ++i) { | 96 | - tcg_debug_assert(n < TCG_MAX_TEMPS); |
63 | @@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) | ||
64 | } | ||
65 | #endif | ||
66 | |||
67 | + /* Do not reuse any EBB that may be allocated within the TB. */ | ||
68 | + tcg_temp_ebb_reset_freed(s); | ||
69 | + | 97 | + |
70 | tcg_optimize(s); | 98 | + if (n >= TCG_MAX_TEMPS) { |
71 | 99 | + /* Signal overflow, starting over with fewer guest insns. */ | |
72 | reachable_code_pass(s); | 100 | + siglongjmp(s->jmp_trans, -2); |
101 | + } | ||
102 | return memset(&s->temps[n], 0, sizeof(TCGTemp)); | ||
103 | } | ||
104 | |||
105 | -static inline TCGTemp *tcg_global_alloc(TCGContext *s) | ||
106 | +static TCGTemp *tcg_global_alloc(TCGContext *s) | ||
107 | { | ||
108 | TCGTemp *ts; | ||
109 | |||
110 | tcg_debug_assert(s->nb_globals == s->nb_temps); | ||
111 | + tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS); | ||
112 | s->nb_globals++; | ||
113 | ts = tcg_temp_alloc(s); | ||
114 | ts->kind = TEMP_GLOBAL; | ||
73 | -- | 115 | -- |
74 | 2.43.0 | 116 | 2.25.1 |
75 | 117 | ||
76 | 118 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Roman Artemev <roman.artemev@syntacore.com> | ||
2 | 1 | ||
3 | On RISC-V to StoreStore barrier corresponds | ||
4 | `fence w, w` not `fence r, r` | ||
5 | |||
6 | Cc: qemu-stable@nongnu.org | ||
7 | Fixes: efbea94c76b ("tcg/riscv: Add slowpath load and store instructions") | ||
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | Signed-off-by: Denis Tomashev <denis.tomashev@syntacore.com> | ||
10 | Signed-off-by: Roman Artemev <roman.artemev@syntacore.com> | ||
11 | Message-ID: <e2f2131e294a49e79959d4fa9ec02cf4@syntacore.com> | ||
12 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
13 | --- | ||
14 | tcg/riscv/tcg-target.c.inc | 2 +- | ||
15 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
16 | |||
17 | diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/tcg/riscv/tcg-target.c.inc | ||
20 | +++ b/tcg/riscv/tcg-target.c.inc | ||
21 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_mb(TCGContext *s, TCGArg a0) | ||
22 | insn |= 0x02100000; | ||
23 | } | ||
24 | if (a0 & TCG_MO_ST_ST) { | ||
25 | - insn |= 0x02200000; | ||
26 | + insn |= 0x01100000; | ||
27 | } | ||
28 | tcg_out32(s, insn); | ||
29 | } | ||
30 | -- | ||
31 | 2.43.0 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This allows targets to declare that the helper requires a | ||
2 | float_status pointer and instead of a generic void pointer. | ||
3 | 1 | ||
4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | include/exec/helper-head.h.inc | 3 +++ | ||
8 | 1 file changed, 3 insertions(+) | ||
9 | |||
10 | diff --git a/include/exec/helper-head.h.inc b/include/exec/helper-head.h.inc | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/include/exec/helper-head.h.inc | ||
13 | +++ b/include/exec/helper-head.h.inc | ||
14 | @@ -XXX,XX +XXX,XX @@ | ||
15 | #define dh_alias_ptr ptr | ||
16 | #define dh_alias_cptr ptr | ||
17 | #define dh_alias_env ptr | ||
18 | +#define dh_alias_fpst ptr | ||
19 | #define dh_alias_void void | ||
20 | #define dh_alias_noreturn noreturn | ||
21 | #define dh_alias(t) glue(dh_alias_, t) | ||
22 | @@ -XXX,XX +XXX,XX @@ | ||
23 | #define dh_ctype_ptr void * | ||
24 | #define dh_ctype_cptr const void * | ||
25 | #define dh_ctype_env CPUArchState * | ||
26 | +#define dh_ctype_fpst float_status * | ||
27 | #define dh_ctype_void void | ||
28 | #define dh_ctype_noreturn G_NORETURN void | ||
29 | #define dh_ctype(t) dh_ctype_##t | ||
30 | @@ -XXX,XX +XXX,XX @@ | ||
31 | #define dh_typecode_f64 dh_typecode_i64 | ||
32 | #define dh_typecode_cptr dh_typecode_ptr | ||
33 | #define dh_typecode_env dh_typecode_ptr | ||
34 | +#define dh_typecode_fpst dh_typecode_ptr | ||
35 | #define dh_typecode(t) dh_typecode_##t | ||
36 | |||
37 | #define dh_callflag_i32 0 | ||
38 | -- | ||
39 | 2.43.0 | ||
40 | |||
41 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
2 | 1 | ||
3 | Rather than manually copying each register, use | ||
4 | the libc memcpy(), which is well optimized nowadays. | ||
5 | |||
6 | Suggested-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> | ||
7 | Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> | ||
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
10 | Message-ID: <20241205205418.67613-1-philmd@linaro.org> | ||
11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | --- | ||
13 | target/sparc/win_helper.c | 26 ++++++++------------------ | ||
14 | 1 file changed, 8 insertions(+), 18 deletions(-) | ||
15 | |||
16 | diff --git a/target/sparc/win_helper.c b/target/sparc/win_helper.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/target/sparc/win_helper.c | ||
19 | +++ b/target/sparc/win_helper.c | ||
20 | @@ -XXX,XX +XXX,XX @@ | ||
21 | #include "exec/helper-proto.h" | ||
22 | #include "trace.h" | ||
23 | |||
24 | -static inline void memcpy32(target_ulong *dst, const target_ulong *src) | ||
25 | -{ | ||
26 | - dst[0] = src[0]; | ||
27 | - dst[1] = src[1]; | ||
28 | - dst[2] = src[2]; | ||
29 | - dst[3] = src[3]; | ||
30 | - dst[4] = src[4]; | ||
31 | - dst[5] = src[5]; | ||
32 | - dst[6] = src[6]; | ||
33 | - dst[7] = src[7]; | ||
34 | -} | ||
35 | - | ||
36 | void cpu_set_cwp(CPUSPARCState *env, int new_cwp) | ||
37 | { | ||
38 | /* put the modified wrap registers at their proper location */ | ||
39 | if (env->cwp == env->nwindows - 1) { | ||
40 | - memcpy32(env->regbase, env->regbase + env->nwindows * 16); | ||
41 | + memcpy(env->regbase, env->regbase + env->nwindows * 16, | ||
42 | + sizeof(env->gregs)); | ||
43 | } | ||
44 | env->cwp = new_cwp; | ||
45 | |||
46 | /* put the wrap registers at their temporary location */ | ||
47 | if (new_cwp == env->nwindows - 1) { | ||
48 | - memcpy32(env->regbase + env->nwindows * 16, env->regbase); | ||
49 | + memcpy(env->regbase + env->nwindows * 16, env->regbase, | ||
50 | + sizeof(env->gregs)); | ||
51 | } | ||
52 | env->regwptr = env->regbase + (new_cwp * 16); | ||
53 | } | ||
54 | @@ -XXX,XX +XXX,XX @@ void cpu_gl_switch_gregs(CPUSPARCState *env, uint32_t new_gl) | ||
55 | dst = get_gl_gregset(env, env->gl); | ||
56 | |||
57 | if (src != dst) { | ||
58 | - memcpy32(dst, env->gregs); | ||
59 | - memcpy32(env->gregs, src); | ||
60 | + memcpy(dst, env->gregs, sizeof(env->gregs)); | ||
61 | + memcpy(env->gregs, src, sizeof(env->gregs)); | ||
62 | } | ||
63 | } | ||
64 | |||
65 | @@ -XXX,XX +XXX,XX @@ void cpu_change_pstate(CPUSPARCState *env, uint32_t new_pstate) | ||
66 | /* Switch global register bank */ | ||
67 | src = get_gregset(env, new_pstate_regs); | ||
68 | dst = get_gregset(env, pstate_regs); | ||
69 | - memcpy32(dst, env->gregs); | ||
70 | - memcpy32(env->gregs, src); | ||
71 | + memcpy(dst, env->gregs, sizeof(env->gregs)); | ||
72 | + memcpy(env->gregs, src, sizeof(env->gregs)); | ||
73 | } else { | ||
74 | trace_win_helper_no_switch_pstate(new_pstate_regs); | ||
75 | } | ||
76 | -- | ||
77 | 2.43.0 | ||
78 | |||
79 | diff view generated by jsdifflib |