1
Pretty small still, but there are two patches that ought
1
V2 replaces the tcg const temp overflow patch.
2
to get backported to stable, so no point in delaying.
2
3
3
4
r~
4
r~
5
5
6
The following changes since commit a5ba0a7e4e150d1350a041f0d0ef9ca6c8d7c307:
7
6
8
Merge tag 'pull-aspeed-20241211' of https://github.com/legoater/qemu into staging (2024-12-11 15:16:47 +0000)
7
The following changes since commit 0e32462630687a18039464511bd0447ada5709c3:
8
9
Merge remote-tracking branch 'remotes/vivier2/tags/linux-user-for-6.0-pull-request' into staging (2021-01-22 10:35:55 +0000)
9
10
10
are available in the Git repository at:
11
are available in the Git repository at:
11
12
12
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20241212
13
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20210124
13
14
14
for you to fetch changes up to 7ac87b14a92234b6a89b701b4043ad6cf8bdcccf:
15
for you to fetch changes up to ae30e86661b0f48562cd95918d37cbeec5d02262:
15
16
16
target/sparc: Use memcpy() and remove memcpy32() (2024-12-12 14:28:38 -0600)
17
tcg: Restart code generation when we run out of temps (2021-01-24 08:03:27 -1000)
17
18
18
----------------------------------------------------------------
19
----------------------------------------------------------------
19
tcg: Reset free_temps before tcg_optimize
20
Fix tcg constant temp overflow.
20
tcg/riscv: Fix StoreStore barrier generation
21
Fix running during atomic single-step.
21
include/exec: Introduce fpst alias in helper-head.h.inc
22
Partial support for apple silicon.
22
target/sparc: Use memcpy() and remove memcpy32()
23
Cleanups for accel/tcg.
23
24
24
----------------------------------------------------------------
25
----------------------------------------------------------------
25
Philippe Mathieu-Daudé (1):
26
Douglas Crosher (1):
26
target/sparc: Use memcpy() and remove memcpy32()
27
tcg: update the cpu running flag in cpu_exec_step_atomic
27
28
28
Richard Henderson (2):
29
Philippe Mathieu-Daudé (4):
29
tcg: Reset free_temps before tcg_optimize
30
accel/tcg: Make cpu_gen_init() static
30
include/exec: Introduce fpst alias in helper-head.h.inc
31
accel/tcg: Restrict tb_gen_code() from other accelerators
32
accel/tcg: Declare missing cpu_loop_exit*() stubs
33
accel/tcg: Restrict cpu_io_recompile() from other accelerators
31
34
32
Roman Artemev (1):
35
Richard Henderson (4):
33
tcg/riscv: Fix StoreStore barrier generation
36
qemu/compiler: Split out qemu_build_not_reached_always
37
tcg: Optimize inline dup_const for MO_64
38
accel/tcg: Move tb_flush_jmp_cache() to cputlb.c
39
tcg: Restart code generation when we run out of temps
34
40
35
include/tcg/tcg-temp-internal.h | 6 ++++++
41
Roman Bolshakov (1):
36
accel/tcg/plugin-gen.c | 2 +-
42
tcg: Toggle page execution for Apple Silicon
37
target/sparc/win_helper.c | 26 ++++++++------------------
38
tcg/tcg.c | 5 ++++-
39
include/exec/helper-head.h.inc | 3 +++
40
tcg/riscv/tcg-target.c.inc | 2 +-
41
6 files changed, 23 insertions(+), 21 deletions(-)
42
43
44
accel/tcg/internal.h | 20 ++++++++++++++++++++
45
include/exec/exec-all.h | 11 -----------
46
include/qemu/compiler.h | 5 +++--
47
include/qemu/osdep.h | 28 ++++++++++++++++++++++++++++
48
include/tcg/tcg.h | 6 +++++-
49
accel/stubs/tcg-stub.c | 10 ++++++++++
50
accel/tcg/cpu-exec.c | 7 +++++++
51
accel/tcg/cputlb.c | 19 +++++++++++++++++++
52
accel/tcg/translate-all.c | 38 +++++++++++++++++++-------------------
53
tcg/tcg.c | 12 +++++++++---
54
10 files changed, 120 insertions(+), 36 deletions(-)
55
create mode 100644 accel/tcg/internal.h
56
diff view generated by jsdifflib
1
When allocating new temps during tcg_optmize, do not re-use
1
Some large translation blocks can generate so many unique
2
any EBB temps that were used within the TB. We do not have
2
constants that we run out of temps to hold them. In this
3
any idea what span of the TB in which the temp was live.
3
case, longjmp back to the start of code generation and
4
restart with a smaller translation block.
4
5
5
Introduce tcg_temp_ebb_reset_freed and use before tcg_optimize,
6
Buglink: https://bugs.launchpad.net/bugs/1912065
6
as well as replacing the equivalent in plugin_gen_inject and
7
Tested-by: BALATON Zoltan <balaton@eik.bme.hu>
7
tcg_func_start.
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
include/tcg/tcg.h | 3 +++
12
accel/tcg/translate-all.c | 15 ++++++++++++++-
13
tcg/tcg.c | 11 ++++++++---
14
3 files changed, 25 insertions(+), 4 deletions(-)
8
15
9
Cc: qemu-stable@nongnu.org
16
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
10
Fixes: fb04ab7ddd8 ("tcg/optimize: Lower TCG_COND_TST{EQ,NE} if unsupported")
11
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2711
12
Reported-by: wannacu <wannacu2049@gmail.com>
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
15
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
16
---
17
include/tcg/tcg-temp-internal.h | 6 ++++++
18
accel/tcg/plugin-gen.c | 2 +-
19
tcg/tcg.c | 5 ++++-
20
3 files changed, 11 insertions(+), 2 deletions(-)
21
22
diff --git a/include/tcg/tcg-temp-internal.h b/include/tcg/tcg-temp-internal.h
23
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
24
--- a/include/tcg/tcg-temp-internal.h
18
--- a/include/tcg/tcg.h
25
+++ b/include/tcg/tcg-temp-internal.h
19
+++ b/include/tcg/tcg.h
26
@@ -XXX,XX +XXX,XX @@ TCGv_i64 tcg_temp_ebb_new_i64(void);
20
@@ -XXX,XX +XXX,XX @@ struct TCGContext {
27
TCGv_ptr tcg_temp_ebb_new_ptr(void);
21
28
TCGv_i128 tcg_temp_ebb_new_i128(void);
22
uint16_t gen_insn_end_off[TCG_MAX_INSNS];
29
23
target_ulong gen_insn_data[TCG_MAX_INSNS][TARGET_INSN_START_WORDS];
30
+/* Forget all freed EBB temps, so that new allocations produce new temps. */
31
+static inline void tcg_temp_ebb_reset_freed(TCGContext *s)
32
+{
33
+ memset(s->free_temps, 0, sizeof(s->free_temps));
34
+}
35
+
24
+
36
#endif /* TCG_TEMP_FREE_H */
25
+ /* Exit to translator on overflow. */
37
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
26
+ sigjmp_buf jmp_trans;
27
};
28
29
static inline bool temp_readonly(TCGTemp *ts)
30
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
38
index XXXXXXX..XXXXXXX 100644
31
index XXXXXXX..XXXXXXX 100644
39
--- a/accel/tcg/plugin-gen.c
32
--- a/accel/tcg/translate-all.c
40
+++ b/accel/tcg/plugin-gen.c
33
+++ b/accel/tcg/translate-all.c
41
@@ -XXX,XX +XXX,XX @@ static void plugin_gen_inject(struct qemu_plugin_tb *plugin_tb)
34
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
42
* that might be live within the existing opcode stream.
35
ti = profile_getclock();
43
* The simplest solution is to release them all and create new.
36
#endif
44
*/
37
45
- memset(tcg_ctx->free_temps, 0, sizeof(tcg_ctx->free_temps));
38
+ gen_code_size = sigsetjmp(tcg_ctx->jmp_trans, 0);
46
+ tcg_temp_ebb_reset_freed(tcg_ctx);
39
+ if (unlikely(gen_code_size != 0)) {
47
40
+ goto error_return;
48
QTAILQ_FOREACH_SAFE(op, &tcg_ctx->ops, link, next) {
41
+ }
49
switch (op->opc) {
42
+
43
tcg_func_start(tcg_ctx);
44
45
tcg_ctx->cpu = env_cpu(env);
46
gen_intermediate_code(cpu, tb, max_insns);
47
tcg_ctx->cpu = NULL;
48
+ max_insns = tb->icount;
49
50
trace_translate_block(tb, tb->pc, tb->tc.ptr);
51
52
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
53
54
gen_code_size = tcg_gen_code(tcg_ctx, tb);
55
if (unlikely(gen_code_size < 0)) {
56
+ error_return:
57
switch (gen_code_size) {
58
case -1:
59
/*
60
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
61
* flush the TBs, allocate a new TB, re-initialize it per
62
* above, and re-do the actual code generation.
63
*/
64
+ qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
65
+ "Restarting code generation for "
66
+ "code_gen_buffer overflow\n");
67
goto buffer_overflow;
68
69
case -2:
70
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
71
* Try again with half as many insns as we attempted this time.
72
* If a single insn overflows, there's a bug somewhere...
73
*/
74
- max_insns = tb->icount;
75
assert(max_insns > 1);
76
max_insns /= 2;
77
+ qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
78
+ "Restarting code generation with "
79
+ "smaller translation block (max %d insns)\n",
80
+ max_insns);
81
goto tb_overflow;
82
83
default:
50
diff --git a/tcg/tcg.c b/tcg/tcg.c
84
diff --git a/tcg/tcg.c b/tcg/tcg.c
51
index XXXXXXX..XXXXXXX 100644
85
index XXXXXXX..XXXXXXX 100644
52
--- a/tcg/tcg.c
86
--- a/tcg/tcg.c
53
+++ b/tcg/tcg.c
87
+++ b/tcg/tcg.c
54
@@ -XXX,XX +XXX,XX @@ void tcg_func_start(TCGContext *s)
88
@@ -XXX,XX +XXX,XX @@ void tcg_func_start(TCGContext *s)
55
s->nb_temps = s->nb_globals;
89
QSIMPLEQ_INIT(&s->labels);
56
90
}
57
/* No temps have been previously allocated for size or locality. */
91
58
- memset(s->free_temps, 0, sizeof(s->free_temps));
92
-static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
59
+ tcg_temp_ebb_reset_freed(s);
93
+static TCGTemp *tcg_temp_alloc(TCGContext *s)
60
94
{
61
/* No constant temps have been previously allocated. */
95
int n = s->nb_temps++;
62
for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
96
- tcg_debug_assert(n < TCG_MAX_TEMPS);
63
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
64
}
65
#endif
66
67
+ /* Do not reuse any EBB that may be allocated within the TB. */
68
+ tcg_temp_ebb_reset_freed(s);
69
+
97
+
70
tcg_optimize(s);
98
+ if (n >= TCG_MAX_TEMPS) {
71
99
+ /* Signal overflow, starting over with fewer guest insns. */
72
reachable_code_pass(s);
100
+ siglongjmp(s->jmp_trans, -2);
101
+ }
102
return memset(&s->temps[n], 0, sizeof(TCGTemp));
103
}
104
105
-static inline TCGTemp *tcg_global_alloc(TCGContext *s)
106
+static TCGTemp *tcg_global_alloc(TCGContext *s)
107
{
108
TCGTemp *ts;
109
110
tcg_debug_assert(s->nb_globals == s->nb_temps);
111
+ tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
112
s->nb_globals++;
113
ts = tcg_temp_alloc(s);
114
ts->kind = TEMP_GLOBAL;
73
--
115
--
74
2.43.0
116
2.25.1
75
117
76
118
diff view generated by jsdifflib
Deleted patch
1
From: Roman Artemev <roman.artemev@syntacore.com>
2
1
3
On RISC-V to StoreStore barrier corresponds
4
`fence w, w` not `fence r, r`
5
6
Cc: qemu-stable@nongnu.org
7
Fixes: efbea94c76b ("tcg/riscv: Add slowpath load and store instructions")
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Denis Tomashev <denis.tomashev@syntacore.com>
10
Signed-off-by: Roman Artemev <roman.artemev@syntacore.com>
11
Message-ID: <e2f2131e294a49e79959d4fa9ec02cf4@syntacore.com>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
14
tcg/riscv/tcg-target.c.inc | 2 +-
15
1 file changed, 1 insertion(+), 1 deletion(-)
16
17
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
18
index XXXXXXX..XXXXXXX 100644
19
--- a/tcg/riscv/tcg-target.c.inc
20
+++ b/tcg/riscv/tcg-target.c.inc
21
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
22
insn |= 0x02100000;
23
}
24
if (a0 & TCG_MO_ST_ST) {
25
- insn |= 0x02200000;
26
+ insn |= 0x01100000;
27
}
28
tcg_out32(s, insn);
29
}
30
--
31
2.43.0
diff view generated by jsdifflib
Deleted patch
1
This allows targets to declare that the helper requires a
2
float_status pointer and instead of a generic void pointer.
3
1
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
include/exec/helper-head.h.inc | 3 +++
8
1 file changed, 3 insertions(+)
9
10
diff --git a/include/exec/helper-head.h.inc b/include/exec/helper-head.h.inc
11
index XXXXXXX..XXXXXXX 100644
12
--- a/include/exec/helper-head.h.inc
13
+++ b/include/exec/helper-head.h.inc
14
@@ -XXX,XX +XXX,XX @@
15
#define dh_alias_ptr ptr
16
#define dh_alias_cptr ptr
17
#define dh_alias_env ptr
18
+#define dh_alias_fpst ptr
19
#define dh_alias_void void
20
#define dh_alias_noreturn noreturn
21
#define dh_alias(t) glue(dh_alias_, t)
22
@@ -XXX,XX +XXX,XX @@
23
#define dh_ctype_ptr void *
24
#define dh_ctype_cptr const void *
25
#define dh_ctype_env CPUArchState *
26
+#define dh_ctype_fpst float_status *
27
#define dh_ctype_void void
28
#define dh_ctype_noreturn G_NORETURN void
29
#define dh_ctype(t) dh_ctype_##t
30
@@ -XXX,XX +XXX,XX @@
31
#define dh_typecode_f64 dh_typecode_i64
32
#define dh_typecode_cptr dh_typecode_ptr
33
#define dh_typecode_env dh_typecode_ptr
34
+#define dh_typecode_fpst dh_typecode_ptr
35
#define dh_typecode(t) dh_typecode_##t
36
37
#define dh_callflag_i32 0
38
--
39
2.43.0
40
41
diff view generated by jsdifflib
Deleted patch
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
2
1
3
Rather than manually copying each register, use
4
the libc memcpy(), which is well optimized nowadays.
5
6
Suggested-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
7
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
Message-ID: <20241205205418.67613-1-philmd@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
13
target/sparc/win_helper.c | 26 ++++++++------------------
14
1 file changed, 8 insertions(+), 18 deletions(-)
15
16
diff --git a/target/sparc/win_helper.c b/target/sparc/win_helper.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/sparc/win_helper.c
19
+++ b/target/sparc/win_helper.c
20
@@ -XXX,XX +XXX,XX @@
21
#include "exec/helper-proto.h"
22
#include "trace.h"
23
24
-static inline void memcpy32(target_ulong *dst, const target_ulong *src)
25
-{
26
- dst[0] = src[0];
27
- dst[1] = src[1];
28
- dst[2] = src[2];
29
- dst[3] = src[3];
30
- dst[4] = src[4];
31
- dst[5] = src[5];
32
- dst[6] = src[6];
33
- dst[7] = src[7];
34
-}
35
-
36
void cpu_set_cwp(CPUSPARCState *env, int new_cwp)
37
{
38
/* put the modified wrap registers at their proper location */
39
if (env->cwp == env->nwindows - 1) {
40
- memcpy32(env->regbase, env->regbase + env->nwindows * 16);
41
+ memcpy(env->regbase, env->regbase + env->nwindows * 16,
42
+ sizeof(env->gregs));
43
}
44
env->cwp = new_cwp;
45
46
/* put the wrap registers at their temporary location */
47
if (new_cwp == env->nwindows - 1) {
48
- memcpy32(env->regbase + env->nwindows * 16, env->regbase);
49
+ memcpy(env->regbase + env->nwindows * 16, env->regbase,
50
+ sizeof(env->gregs));
51
}
52
env->regwptr = env->regbase + (new_cwp * 16);
53
}
54
@@ -XXX,XX +XXX,XX @@ void cpu_gl_switch_gregs(CPUSPARCState *env, uint32_t new_gl)
55
dst = get_gl_gregset(env, env->gl);
56
57
if (src != dst) {
58
- memcpy32(dst, env->gregs);
59
- memcpy32(env->gregs, src);
60
+ memcpy(dst, env->gregs, sizeof(env->gregs));
61
+ memcpy(env->gregs, src, sizeof(env->gregs));
62
}
63
}
64
65
@@ -XXX,XX +XXX,XX @@ void cpu_change_pstate(CPUSPARCState *env, uint32_t new_pstate)
66
/* Switch global register bank */
67
src = get_gregset(env, new_pstate_regs);
68
dst = get_gregset(env, pstate_regs);
69
- memcpy32(dst, env->gregs);
70
- memcpy32(env->gregs, src);
71
+ memcpy(dst, env->gregs, sizeof(env->gregs));
72
+ memcpy(env->gregs, src, sizeof(env->gregs));
73
} else {
74
trace_win_helper_no_switch_pstate(new_pstate_regs);
75
}
76
--
77
2.43.0
78
79
diff view generated by jsdifflib