1
Pretty small still, but there are two patches that ought
1
V2 fixes an error in patch 22 wrt MacOS.
2
to get backported to stable, so no point in delaying.
2
It's a shame we don't have public CI for that.
3
3
4
4
r~
5
r~
5
6
6
The following changes since commit a5ba0a7e4e150d1350a041f0d0ef9ca6c8d7c307:
7
7
8
Merge tag 'pull-aspeed-20241211' of https://github.com/legoater/qemu into staging (2024-12-11 15:16:47 +0000)
8
The following changes since commit 894fc4fd670aaf04a67dc7507739f914ff4bacf2:
9
10
Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request' into staging (2021-06-11 09:21:48 +0100)
9
11
10
are available in the Git repository at:
12
are available in the Git repository at:
11
13
12
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20241212
14
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20210613
13
15
14
for you to fetch changes up to 7ac87b14a92234b6a89b701b4043ad6cf8bdcccf:
16
for you to fetch changes up to a5a8b84772e13066c6c45f480cc5b5312bbde08e:
15
17
16
target/sparc: Use memcpy() and remove memcpy32() (2024-12-12 14:28:38 -0600)
18
docs/devel: Explain in more detail the TB chaining mechanisms (2021-06-13 17:42:40 -0700)
17
19
18
----------------------------------------------------------------
20
----------------------------------------------------------------
19
tcg: Reset free_temps before tcg_optimize
21
Clean up code_gen_buffer allocation.
20
tcg/riscv: Fix StoreStore barrier generation
22
Add tcg_remove_ops_after.
21
include/exec: Introduce fpst alias in helper-head.h.inc
23
Fix tcg_constant_* documentation.
22
target/sparc: Use memcpy() and remove memcpy32()
24
Improve TB chaining documentation.
25
Fix float32_exp2.
26
Fix arm tcg_out_op function signature.
23
27
24
----------------------------------------------------------------
28
----------------------------------------------------------------
25
Philippe Mathieu-Daudé (1):
29
Jose R. Ziviani (1):
26
target/sparc: Use memcpy() and remove memcpy32()
30
tcg/arm: Fix tcg_out_op function signature
27
31
28
Richard Henderson (2):
32
Luis Pires (1):
29
tcg: Reset free_temps before tcg_optimize
33
docs/devel: Explain in more detail the TB chaining mechanisms
30
include/exec: Introduce fpst alias in helper-head.h.inc
31
34
32
Roman Artemev (1):
35
Richard Henderson (32):
33
tcg/riscv: Fix StoreStore barrier generation
36
meson: Split out tcg/meson.build
37
meson: Split out fpu/meson.build
38
tcg: Re-order tcg_region_init vs tcg_prologue_init
39
tcg: Remove error return from tcg_region_initial_alloc__locked
40
tcg: Split out tcg_region_initial_alloc
41
tcg: Split out tcg_region_prologue_set
42
tcg: Split out region.c
43
accel/tcg: Inline cpu_gen_init
44
accel/tcg: Move alloc_code_gen_buffer to tcg/region.c
45
accel/tcg: Rename tcg_init to tcg_init_machine
46
tcg: Create tcg_init
47
accel/tcg: Merge tcg_exec_init into tcg_init_machine
48
accel/tcg: Use MiB in tcg_init_machine
49
accel/tcg: Pass down max_cpus to tcg_init
50
tcg: Introduce tcg_max_ctxs
51
tcg: Move MAX_CODE_GEN_BUFFER_SIZE to tcg-target.h
52
tcg: Replace region.end with region.total_size
53
tcg: Rename region.start to region.after_prologue
54
tcg: Tidy tcg_n_regions
55
tcg: Tidy split_cross_256mb
56
tcg: Move in_code_gen_buffer and tests to region.c
57
tcg: Allocate code_gen_buffer into struct tcg_region_state
58
tcg: Return the map protection from alloc_code_gen_buffer
59
tcg: Sink qemu_madvise call to common code
60
util/osdep: Add qemu_mprotect_rw
61
tcg: Round the tb_size default from qemu_get_host_physmem
62
tcg: Merge buffer protection and guard page protection
63
tcg: When allocating for !splitwx, begin with PROT_NONE
64
tcg: Move tcg_init_ctx and tcg_ctx from accel/tcg/
65
tcg: Introduce tcg_remove_ops_after
66
tcg: Fix documentation for tcg_constant_* vs tcg_temp_free_*
67
softfloat: Fix tp init in float32_exp2
34
68
35
include/tcg/tcg-temp-internal.h | 6 ++++++
69
docs/devel/tcg.rst | 101 ++++-
36
accel/tcg/plugin-gen.c | 2 +-
70
meson.build | 12 +-
37
target/sparc/win_helper.c | 26 ++++++++------------------
71
accel/tcg/internal.h | 2 +
38
tcg/tcg.c | 5 ++++-
72
include/qemu/osdep.h | 1 +
39
include/exec/helper-head.h.inc | 3 +++
73
include/sysemu/tcg.h | 2 -
40
tcg/riscv/tcg-target.c.inc | 2 +-
74
include/tcg/tcg.h | 28 +-
41
6 files changed, 23 insertions(+), 21 deletions(-)
75
tcg/aarch64/tcg-target.h | 1 +
76
tcg/arm/tcg-target.h | 1 +
77
tcg/i386/tcg-target.h | 2 +
78
tcg/mips/tcg-target.h | 6 +
79
tcg/ppc/tcg-target.h | 2 +
80
tcg/riscv/tcg-target.h | 1 +
81
tcg/s390/tcg-target.h | 3 +
82
tcg/sparc/tcg-target.h | 1 +
83
tcg/tcg-internal.h | 40 ++
84
tcg/tci/tcg-target.h | 1 +
85
accel/tcg/tcg-all.c | 32 +-
86
accel/tcg/translate-all.c | 439 +-------------------
87
bsd-user/main.c | 3 +-
88
fpu/softfloat.c | 2 +-
89
linux-user/main.c | 1 -
90
tcg/region.c | 999 ++++++++++++++++++++++++++++++++++++++++++++++
91
tcg/tcg.c | 649 +++---------------------------
92
util/osdep.c | 9 +
93
tcg/arm/tcg-target.c.inc | 3 +-
94
fpu/meson.build | 1 +
95
tcg/meson.build | 14 +
96
27 files changed, 1266 insertions(+), 1090 deletions(-)
97
create mode 100644 tcg/tcg-internal.h
98
create mode 100644 tcg/region.c
99
create mode 100644 fpu/meson.build
100
create mode 100644 tcg/meson.build
42
101
diff view generated by jsdifflib
Deleted patch
1
When allocating new temps during tcg_optmize, do not re-use
2
any EBB temps that were used within the TB. We do not have
3
any idea what span of the TB in which the temp was live.
4
1
5
Introduce tcg_temp_ebb_reset_freed and use before tcg_optimize,
6
as well as replacing the equivalent in plugin_gen_inject and
7
tcg_func_start.
8
9
Cc: qemu-stable@nongnu.org
10
Fixes: fb04ab7ddd8 ("tcg/optimize: Lower TCG_COND_TST{EQ,NE} if unsupported")
11
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2711
12
Reported-by: wannacu <wannacu2049@gmail.com>
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
15
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
16
---
17
include/tcg/tcg-temp-internal.h | 6 ++++++
18
accel/tcg/plugin-gen.c | 2 +-
19
tcg/tcg.c | 5 ++++-
20
3 files changed, 11 insertions(+), 2 deletions(-)
21
22
diff --git a/include/tcg/tcg-temp-internal.h b/include/tcg/tcg-temp-internal.h
23
index XXXXXXX..XXXXXXX 100644
24
--- a/include/tcg/tcg-temp-internal.h
25
+++ b/include/tcg/tcg-temp-internal.h
26
@@ -XXX,XX +XXX,XX @@ TCGv_i64 tcg_temp_ebb_new_i64(void);
27
TCGv_ptr tcg_temp_ebb_new_ptr(void);
28
TCGv_i128 tcg_temp_ebb_new_i128(void);
29
30
+/* Forget all freed EBB temps, so that new allocations produce new temps. */
31
+static inline void tcg_temp_ebb_reset_freed(TCGContext *s)
32
+{
33
+ memset(s->free_temps, 0, sizeof(s->free_temps));
34
+}
35
+
36
#endif /* TCG_TEMP_FREE_H */
37
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/accel/tcg/plugin-gen.c
40
+++ b/accel/tcg/plugin-gen.c
41
@@ -XXX,XX +XXX,XX @@ static void plugin_gen_inject(struct qemu_plugin_tb *plugin_tb)
42
* that might be live within the existing opcode stream.
43
* The simplest solution is to release them all and create new.
44
*/
45
- memset(tcg_ctx->free_temps, 0, sizeof(tcg_ctx->free_temps));
46
+ tcg_temp_ebb_reset_freed(tcg_ctx);
47
48
QTAILQ_FOREACH_SAFE(op, &tcg_ctx->ops, link, next) {
49
switch (op->opc) {
50
diff --git a/tcg/tcg.c b/tcg/tcg.c
51
index XXXXXXX..XXXXXXX 100644
52
--- a/tcg/tcg.c
53
+++ b/tcg/tcg.c
54
@@ -XXX,XX +XXX,XX @@ void tcg_func_start(TCGContext *s)
55
s->nb_temps = s->nb_globals;
56
57
/* No temps have been previously allocated for size or locality. */
58
- memset(s->free_temps, 0, sizeof(s->free_temps));
59
+ tcg_temp_ebb_reset_freed(s);
60
61
/* No constant temps have been previously allocated. */
62
for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
63
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
64
}
65
#endif
66
67
+ /* Do not reuse any EBB that may be allocated within the TB. */
68
+ tcg_temp_ebb_reset_freed(s);
69
+
70
tcg_optimize(s);
71
72
reachable_code_pass(s);
73
--
74
2.43.0
75
76
diff view generated by jsdifflib
Deleted patch
1
From: Roman Artemev <roman.artemev@syntacore.com>
2
1
3
On RISC-V to StoreStore barrier corresponds
4
`fence w, w` not `fence r, r`
5
6
Cc: qemu-stable@nongnu.org
7
Fixes: efbea94c76b ("tcg/riscv: Add slowpath load and store instructions")
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Denis Tomashev <denis.tomashev@syntacore.com>
10
Signed-off-by: Roman Artemev <roman.artemev@syntacore.com>
11
Message-ID: <e2f2131e294a49e79959d4fa9ec02cf4@syntacore.com>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
14
tcg/riscv/tcg-target.c.inc | 2 +-
15
1 file changed, 1 insertion(+), 1 deletion(-)
16
17
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
18
index XXXXXXX..XXXXXXX 100644
19
--- a/tcg/riscv/tcg-target.c.inc
20
+++ b/tcg/riscv/tcg-target.c.inc
21
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
22
insn |= 0x02100000;
23
}
24
if (a0 & TCG_MO_ST_ST) {
25
- insn |= 0x02200000;
26
+ insn |= 0x01100000;
27
}
28
tcg_out32(s, insn);
29
}
30
--
31
2.43.0
diff view generated by jsdifflib
Deleted patch
1
This allows targets to declare that the helper requires a
2
float_status pointer and instead of a generic void pointer.
3
1
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
include/exec/helper-head.h.inc | 3 +++
8
1 file changed, 3 insertions(+)
9
10
diff --git a/include/exec/helper-head.h.inc b/include/exec/helper-head.h.inc
11
index XXXXXXX..XXXXXXX 100644
12
--- a/include/exec/helper-head.h.inc
13
+++ b/include/exec/helper-head.h.inc
14
@@ -XXX,XX +XXX,XX @@
15
#define dh_alias_ptr ptr
16
#define dh_alias_cptr ptr
17
#define dh_alias_env ptr
18
+#define dh_alias_fpst ptr
19
#define dh_alias_void void
20
#define dh_alias_noreturn noreturn
21
#define dh_alias(t) glue(dh_alias_, t)
22
@@ -XXX,XX +XXX,XX @@
23
#define dh_ctype_ptr void *
24
#define dh_ctype_cptr const void *
25
#define dh_ctype_env CPUArchState *
26
+#define dh_ctype_fpst float_status *
27
#define dh_ctype_void void
28
#define dh_ctype_noreturn G_NORETURN void
29
#define dh_ctype(t) dh_ctype_##t
30
@@ -XXX,XX +XXX,XX @@
31
#define dh_typecode_f64 dh_typecode_i64
32
#define dh_typecode_cptr dh_typecode_ptr
33
#define dh_typecode_env dh_typecode_ptr
34
+#define dh_typecode_fpst dh_typecode_ptr
35
#define dh_typecode(t) dh_typecode_##t
36
37
#define dh_callflag_i32 0
38
--
39
2.43.0
40
41
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@linaro.org>
1
Do not mess around with setting values within tcg_init_ctx.
2
Put the values into 'region' directly, which is where they
3
will live for the lifetime of the program.
2
4
3
Rather than manually copying each register, use
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
the libc memcpy(), which is well optimized nowadays.
6
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
6
Suggested-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
7
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
Message-ID: <20241205205418.67613-1-philmd@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
8
---
13
target/sparc/win_helper.c | 26 ++++++++------------------
9
tcg/region.c | 64 ++++++++++++++++++++++------------------------------
14
1 file changed, 8 insertions(+), 18 deletions(-)
10
1 file changed, 27 insertions(+), 37 deletions(-)
15
11
16
diff --git a/target/sparc/win_helper.c b/target/sparc/win_helper.c
12
diff --git a/tcg/region.c b/tcg/region.c
17
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
18
--- a/target/sparc/win_helper.c
14
--- a/tcg/region.c
19
+++ b/target/sparc/win_helper.c
15
+++ b/tcg/region.c
20
@@ -XXX,XX +XXX,XX @@
16
@@ -XXX,XX +XXX,XX @@ static size_t tree_size;
21
#include "exec/helper-proto.h"
17
22
#include "trace.h"
18
bool in_code_gen_buffer(const void *p)
23
19
{
24
-static inline void memcpy32(target_ulong *dst, const target_ulong *src)
20
- const TCGContext *s = &tcg_init_ctx;
25
-{
21
/*
26
- dst[0] = src[0];
22
* Much like it is valid to have a pointer to the byte past the
27
- dst[1] = src[1];
23
* end of an array (so long as you don't dereference it), allow
28
- dst[2] = src[2];
24
* a pointer to the byte past the end of the code gen buffer.
29
- dst[3] = src[3];
25
*/
30
- dst[4] = src[4];
26
- return (size_t)(p - s->code_gen_buffer) <= s->code_gen_buffer_size;
31
- dst[5] = src[5];
27
+ return (size_t)(p - region.start_aligned) <= region.total_size;
32
- dst[6] = src[6];
28
}
33
- dst[7] = src[7];
29
34
-}
30
#ifdef CONFIG_DEBUG_TCG
31
@@ -XXX,XX +XXX,XX @@ static bool alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp)
32
}
33
qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
34
35
- tcg_ctx->code_gen_buffer = buf;
36
- tcg_ctx->code_gen_buffer_size = size;
37
+ region.start_aligned = buf;
38
+ region.total_size = size;
39
return true;
40
}
41
#elif defined(_WIN32)
42
@@ -XXX,XX +XXX,XX @@ static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
43
return false;
44
}
45
46
- tcg_ctx->code_gen_buffer = buf;
47
- tcg_ctx->code_gen_buffer_size = size;
48
+ region.start_aligned = buf;
49
+ region.total_size = size;
50
return true;
51
}
52
#else
53
@@ -XXX,XX +XXX,XX @@ static bool alloc_code_gen_buffer_anon(size_t size, int prot,
54
/* Request large pages for the buffer. */
55
qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
56
57
- tcg_ctx->code_gen_buffer = buf;
58
- tcg_ctx->code_gen_buffer_size = size;
59
+ region.start_aligned = buf;
60
+ region.total_size = size;
61
return true;
62
}
63
64
@@ -XXX,XX +XXX,XX @@ static bool alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
65
return false;
66
}
67
/* The size of the mapping may have been adjusted. */
68
- size = tcg_ctx->code_gen_buffer_size;
69
- buf_rx = tcg_ctx->code_gen_buffer;
70
+ buf_rx = region.start_aligned;
71
+ size = region.total_size;
72
#endif
73
74
buf_rw = qemu_memfd_alloc("tcg-jit", size, 0, &fd, errp);
75
@@ -XXX,XX +XXX,XX @@ static bool alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
76
#endif
77
78
close(fd);
79
- tcg_ctx->code_gen_buffer = buf_rw;
80
- tcg_ctx->code_gen_buffer_size = size;
81
+ region.start_aligned = buf_rw;
82
+ region.total_size = size;
83
tcg_splitwx_diff = buf_rx - buf_rw;
84
85
/* Request large pages for the buffer and the splitwx. */
86
@@ -XXX,XX +XXX,XX @@ static bool alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp)
87
return false;
88
}
89
90
- buf_rw = (mach_vm_address_t)tcg_ctx->code_gen_buffer;
91
+ buf_rw = (mach_vm_address_t)region.start_aligned;
92
buf_rx = 0;
93
ret = mach_vm_remap(mach_task_self(),
94
&buf_rx,
95
@@ -XXX,XX +XXX,XX @@ static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
96
*/
97
void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus)
98
{
99
- void *buf, *aligned, *end;
100
- size_t total_size;
101
size_t page_size;
102
size_t region_size;
103
- size_t n_regions;
104
size_t i;
105
bool ok;
106
107
@@ -XXX,XX +XXX,XX @@ void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus)
108
splitwx, &error_fatal);
109
assert(ok);
110
111
- buf = tcg_init_ctx.code_gen_buffer;
112
- total_size = tcg_init_ctx.code_gen_buffer_size;
113
- page_size = qemu_real_host_page_size;
114
- n_regions = tcg_n_regions(total_size, max_cpus);
35
-
115
-
36
void cpu_set_cwp(CPUSPARCState *env, int new_cwp)
116
- /* The first region will be 'aligned - buf' bytes larger than the others */
37
{
117
- aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
38
/* put the modified wrap registers at their proper location */
118
- g_assert(aligned < tcg_init_ctx.code_gen_buffer + total_size);
39
if (env->cwp == env->nwindows - 1) {
119
-
40
- memcpy32(env->regbase, env->regbase + env->nwindows * 16);
120
/*
41
+ memcpy(env->regbase, env->regbase + env->nwindows * 16,
121
* Make region_size a multiple of page_size, using aligned as the start.
42
+ sizeof(env->gregs));
122
* As a result of this we might end up with a few extra pages at the end of
43
}
123
* the buffer; we will assign those to the last region.
44
env->cwp = new_cwp;
124
*/
45
125
- region_size = (total_size - (aligned - buf)) / n_regions;
46
/* put the wrap registers at their temporary location */
126
+ region.n = tcg_n_regions(region.total_size, max_cpus);
47
if (new_cwp == env->nwindows - 1) {
127
+ page_size = qemu_real_host_page_size;
48
- memcpy32(env->regbase + env->nwindows * 16, env->regbase);
128
+ region_size = region.total_size / region.n;
49
+ memcpy(env->regbase + env->nwindows * 16, env->regbase,
129
region_size = QEMU_ALIGN_DOWN(region_size, page_size);
50
+ sizeof(env->gregs));
130
51
}
131
/* A region must have at least 2 pages; one code, one guard */
52
env->regwptr = env->regbase + (new_cwp * 16);
132
g_assert(region_size >= 2 * page_size);
53
}
133
+ region.stride = region_size;
54
@@ -XXX,XX +XXX,XX @@ void cpu_gl_switch_gregs(CPUSPARCState *env, uint32_t new_gl)
134
+
55
dst = get_gl_gregset(env, env->gl);
135
+ /* Reserve space for guard pages. */
56
136
+ region.size = region_size - page_size;
57
if (src != dst) {
137
+ region.total_size -= page_size;
58
- memcpy32(dst, env->gregs);
138
+
59
- memcpy32(env->gregs, src);
139
+ /*
60
+ memcpy(dst, env->gregs, sizeof(env->gregs));
140
+ * The first region will be smaller than the others, via the prologue,
61
+ memcpy(env->gregs, src, sizeof(env->gregs));
141
+ * which has yet to be allocated. For now, the first region begins at
62
}
142
+ * the page boundary.
63
}
143
+ */
64
144
+ region.after_prologue = region.start_aligned;
65
@@ -XXX,XX +XXX,XX @@ void cpu_change_pstate(CPUSPARCState *env, uint32_t new_pstate)
145
66
/* Switch global register bank */
146
/* init the region struct */
67
src = get_gregset(env, new_pstate_regs);
147
qemu_mutex_init(&region.lock);
68
dst = get_gregset(env, pstate_regs);
148
- region.n = n_regions;
69
- memcpy32(dst, env->gregs);
149
- region.size = region_size - page_size;
70
- memcpy32(env->gregs, src);
150
- region.stride = region_size;
71
+ memcpy(dst, env->gregs, sizeof(env->gregs));
151
- region.after_prologue = buf;
72
+ memcpy(env->gregs, src, sizeof(env->gregs));
152
- region.start_aligned = aligned;
73
} else {
153
- /* page-align the end, since its last page will be a guard page */
74
trace_win_helper_no_switch_pstate(new_pstate_regs);
154
- end = QEMU_ALIGN_PTR_DOWN(buf + total_size, page_size);
75
}
155
- /* account for that last guard page */
156
- end -= page_size;
157
- total_size = end - aligned;
158
- region.total_size = total_size;
159
160
/*
161
* Set guard pages in the rw buffer, as that's the one into which
76
--
162
--
77
2.43.0
163
2.25.1
78
164
79
165
diff view generated by jsdifflib