1 | The following changes since commit e0175b71638cf4398903c0d25f93fe62e0606389: | 1 | v2: Fix target/loongarch printf formats for vaddr |
---|---|---|---|
2 | Include two more reviewed patches. | ||
2 | 3 | ||
3 | Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20200228' into staging (2020-02-28 16:39:27 +0000) | 4 | This time with actual pull urls. :-/ |
5 | |||
6 | r~ | ||
7 | |||
8 | |||
9 | The following changes since commit db7aa99ef894e88fc5eedf02ca2579b8c344b2ec: | ||
10 | |||
11 | Merge tag 'hw-misc-20250216' of https://github.com/philmd/qemu into staging (2025-02-16 20:48:06 -0500) | ||
4 | 12 | ||
5 | are available in the Git repository at: | 13 | are available in the Git repository at: |
6 | 14 | ||
7 | https://github.com/rth7680/qemu.git tags/pull-tcg-20200228 | 15 | https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20250215-2 |
8 | 16 | ||
9 | for you to fetch changes up to 600e17b261555c56a048781b8dd5ba3985650013: | 17 | for you to fetch changes up to a39bdd0f4ba96fcbb6b5bcb6e89591d2b24f52eb: |
10 | 18 | ||
11 | accel/tcg: increase default code gen buffer size for 64 bit (2020-02-28 17:43:31 -0800) | 19 | tcg: Remove TCG_TARGET_HAS_{br,set}cond2 from riscv and loongarch64 (2025-02-17 09:52:07 -0800) |
12 | 20 | ||
13 | ---------------------------------------------------------------- | 21 | ---------------------------------------------------------------- |
14 | Fix race in cpu_exec_step_atomic. | 22 | tcg: Remove last traces of TCG_TARGET_NEED_POOL_LABELS |
15 | Work around compile failure with -fno-inine. | 23 | tcg: Cleanups after disallowing 64-on-32 |
16 | Expand tcg/arm epilogue inline. | 24 | tcg: Introduce constraint for zero register |
17 | Adjustments to the default code gen buffer size. | 25 | tcg: Remove TCG_TARGET_HAS_{br,set}cond2 from riscv and loongarch64 |
26 | tcg/i386: Use tcg_{high,unsigned}_cond in tcg_out_brcond2 | ||
27 | linux-user: Move TARGET_SA_RESTORER out of generic/signal.h | ||
28 | linux-user: Fix alignment when unmapping excess reservation | ||
29 | target/sparc: Fix register selection for all F*TOx and FxTO* instructions | ||
30 | target/sparc: Fix gdbstub incorrectly handling registers f32-f62 | ||
31 | target/sparc: fake UltraSPARC T1 PCR and PIC registers | ||
18 | 32 | ||
19 | ---------------------------------------------------------------- | 33 | ---------------------------------------------------------------- |
20 | Alex Bennée (5): | 34 | Andreas Schwab (1): |
21 | accel/tcg: fix race in cpu_exec_step_atomic (bug 1863025) | 35 | linux-user: Move TARGET_SA_RESTORER out of generic/signal.h |
22 | accel/tcg: use units.h for defining code gen buffer sizes | ||
23 | accel/tcg: remove link between guest ram and TCG cache size | ||
24 | accel/tcg: only USE_STATIC_CODE_GEN_BUFFER on 32 bit hosts | ||
25 | accel/tcg: increase default code gen buffer size for 64 bit | ||
26 | 36 | ||
27 | Richard Henderson (2): | 37 | Artyom Tarasenko (1): |
28 | tcg/arm: Split out tcg_out_epilogue | 38 | target/sparc: fake UltraSPARC T1 PCR and PIC registers |
29 | tcg/arm: Expand epilogue inline | ||
30 | 39 | ||
31 | Zenghui Yu (1): | 40 | Fabiano Rosas (1): |
32 | compiler.h: Don't use compile-time assert when __NO_INLINE__ is defined | 41 | elfload: Fix alignment when unmapping excess reservation |
33 | 42 | ||
34 | include/qemu/compiler.h | 2 +- | 43 | Mikael Szreder (2): |
35 | accel/tcg/cpu-exec.c | 21 ++++++++-------- | 44 | target/sparc: Fix register selection for all F*TOx and FxTO* instructions |
36 | accel/tcg/translate-all.c | 61 ++++++++++++++++++++++++++++------------------- | 45 | target/sparc: Fix gdbstub incorrectly handling registers f32-f62 |
37 | tcg/arm/tcg-target.inc.c | 29 ++++++++++------------ | ||
38 | 4 files changed, 60 insertions(+), 53 deletions(-) | ||
39 | 46 | ||
47 | Richard Henderson (22): | ||
48 | tcg: Remove last traces of TCG_TARGET_NEED_POOL_LABELS | ||
49 | tcg: Remove TCG_OVERSIZED_GUEST | ||
50 | tcg: Drop support for two address registers in gen_ldst | ||
51 | tcg: Merge INDEX_op_qemu_*_{a32,a64}_* | ||
52 | tcg/arm: Drop addrhi from prepare_host_addr | ||
53 | tcg/i386: Drop addrhi from prepare_host_addr | ||
54 | tcg/mips: Drop addrhi from prepare_host_addr | ||
55 | tcg/ppc: Drop addrhi from prepare_host_addr | ||
56 | tcg: Replace addr{lo,hi}_reg with addr_reg in TCGLabelQemuLdst | ||
57 | plugins: Fix qemu_plugin_read_memory_vaddr parameters | ||
58 | accel/tcg: Fix tlb_set_page_with_attrs, tlb_set_page | ||
59 | target/loongarch: Use VADDR_PRIx for logging pc_next | ||
60 | include/exec: Change vaddr to uintptr_t | ||
61 | include/exec: Use uintptr_t in CPUTLBEntry | ||
62 | tcg: Introduce the 'z' constraint for a hardware zero register | ||
63 | tcg/aarch64: Use 'z' constraint | ||
64 | tcg/loongarch64: Use 'z' constraint | ||
65 | tcg/mips: Use 'z' constraint | ||
66 | tcg/riscv: Use 'z' constraint | ||
67 | tcg/sparc64: Use 'z' constraint | ||
68 | tcg/i386: Use tcg_{high,unsigned}_cond in tcg_out_brcond2 | ||
69 | tcg: Remove TCG_TARGET_HAS_{br,set}cond2 from riscv and loongarch64 | ||
70 | |||
71 | include/exec/tlb-common.h | 10 +- | ||
72 | include/exec/vaddr.h | 16 +- | ||
73 | include/qemu/atomic.h | 18 +- | ||
74 | include/tcg/oversized-guest.h | 23 --- | ||
75 | include/tcg/tcg-opc.h | 28 +-- | ||
76 | include/tcg/tcg.h | 3 +- | ||
77 | linux-user/aarch64/target_signal.h | 2 + | ||
78 | linux-user/arm/target_signal.h | 2 + | ||
79 | linux-user/generic/signal.h | 1 - | ||
80 | linux-user/i386/target_signal.h | 2 + | ||
81 | linux-user/m68k/target_signal.h | 1 + | ||
82 | linux-user/microblaze/target_signal.h | 2 + | ||
83 | linux-user/ppc/target_signal.h | 2 + | ||
84 | linux-user/s390x/target_signal.h | 2 + | ||
85 | linux-user/sh4/target_signal.h | 2 + | ||
86 | linux-user/x86_64/target_signal.h | 2 + | ||
87 | linux-user/xtensa/target_signal.h | 2 + | ||
88 | tcg/aarch64/tcg-target-con-set.h | 12 +- | ||
89 | tcg/aarch64/tcg-target.h | 2 + | ||
90 | tcg/loongarch64/tcg-target-con-set.h | 15 +- | ||
91 | tcg/loongarch64/tcg-target-con-str.h | 1 - | ||
92 | tcg/loongarch64/tcg-target-has.h | 2 - | ||
93 | tcg/loongarch64/tcg-target.h | 2 + | ||
94 | tcg/mips/tcg-target-con-set.h | 26 +-- | ||
95 | tcg/mips/tcg-target-con-str.h | 1 - | ||
96 | tcg/mips/tcg-target.h | 2 + | ||
97 | tcg/riscv/tcg-target-con-set.h | 10 +- | ||
98 | tcg/riscv/tcg-target-con-str.h | 1 - | ||
99 | tcg/riscv/tcg-target-has.h | 2 - | ||
100 | tcg/riscv/tcg-target.h | 2 + | ||
101 | tcg/sparc64/tcg-target-con-set.h | 12 +- | ||
102 | tcg/sparc64/tcg-target-con-str.h | 1 - | ||
103 | tcg/sparc64/tcg-target.h | 3 +- | ||
104 | tcg/tci/tcg-target.h | 1 - | ||
105 | accel/tcg/cputlb.c | 32 +--- | ||
106 | accel/tcg/tcg-all.c | 9 +- | ||
107 | linux-user/elfload.c | 4 +- | ||
108 | plugins/api.c | 2 +- | ||
109 | target/arm/ptw.c | 34 ---- | ||
110 | target/loongarch/tcg/translate.c | 2 +- | ||
111 | target/riscv/cpu_helper.c | 13 +- | ||
112 | target/sparc/gdbstub.c | 18 +- | ||
113 | target/sparc/translate.c | 19 +++ | ||
114 | tcg/optimize.c | 21 +-- | ||
115 | tcg/tcg-op-ldst.c | 103 +++-------- | ||
116 | tcg/tcg.c | 97 +++++------ | ||
117 | tcg/tci.c | 119 +++---------- | ||
118 | docs/devel/multi-thread-tcg.rst | 1 - | ||
119 | docs/devel/tcg-ops.rst | 4 +- | ||
120 | target/loongarch/tcg/insn_trans/trans_atomic.c.inc | 2 +- | ||
121 | target/sparc/insns.decode | 19 ++- | ||
122 | tcg/aarch64/tcg-target.c.inc | 86 ++++------ | ||
123 | tcg/arm/tcg-target.c.inc | 114 ++++--------- | ||
124 | tcg/i386/tcg-target.c.inc | 190 +++++---------------- | ||
125 | tcg/loongarch64/tcg-target.c.inc | 72 +++----- | ||
126 | tcg/mips/tcg-target.c.inc | 169 ++++++------------ | ||
127 | tcg/ppc/tcg-target.c.inc | 164 +++++------------- | ||
128 | tcg/riscv/tcg-target.c.inc | 56 +++--- | ||
129 | tcg/s390x/tcg-target.c.inc | 40 ++--- | ||
130 | tcg/sparc64/tcg-target.c.inc | 45 ++--- | ||
131 | tcg/tci/tcg-target.c.inc | 60 ++----- | ||
132 | 61 files changed, 548 insertions(+), 1160 deletions(-) | ||
133 | delete mode 100644 include/tcg/oversized-guest.h | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Alex Bennée <alex.bennee@linaro.org> | ||
2 | 1 | ||
3 | The bug describes a race whereby cpu_exec_step_atomic can acquire a TB | ||
4 | which is invalidated by a tb_flush before we execute it. This doesn't | ||
5 | affect the other cpu_exec modes as a tb_flush by it's nature can only | ||
6 | occur on a quiescent system. The race was described as: | ||
7 | |||
8 | B2. tcg_cpu_exec => cpu_exec => tb_find => tb_gen_code | ||
9 | B3. tcg_tb_alloc obtains a new TB | ||
10 | |||
11 | C3. TB obtained with tb_lookup__cpu_state or tb_gen_code | ||
12 | (same TB as B2) | ||
13 | |||
14 | A3. start_exclusive critical section entered | ||
15 | A4. do_tb_flush is called, TB memory freed/re-allocated | ||
16 | A5. end_exclusive exits critical section | ||
17 | |||
18 | B2. tcg_cpu_exec => cpu_exec => tb_find => tb_gen_code | ||
19 | B3. tcg_tb_alloc reallocates TB from B2 | ||
20 | |||
21 | C4. start_exclusive critical section entered | ||
22 | C5. cpu_tb_exec executes the TB code that was free in A4 | ||
23 | |||
24 | The simplest fix is to widen the exclusive period to include the TB | ||
25 | lookup. As a result we can drop the complication of checking we are in | ||
26 | the exclusive region before we end it. | ||
27 | |||
28 | Cc: Yifan <me@yifanlu.com> | ||
29 | Buglink: https://bugs.launchpad.net/qemu/+bug/1863025 | ||
30 | Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> | ||
31 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
32 | Signed-off-by: Alex Bennée <alex.bennee@linaro.org> | ||
33 | Message-Id: <20200214144952.15502-1-alex.bennee@linaro.org> | ||
34 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
35 | --- | ||
36 | accel/tcg/cpu-exec.c | 21 +++++++++++---------- | ||
37 | 1 file changed, 11 insertions(+), 10 deletions(-) | ||
38 | |||
39 | diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c | ||
40 | index XXXXXXX..XXXXXXX 100644 | ||
41 | --- a/accel/tcg/cpu-exec.c | ||
42 | +++ b/accel/tcg/cpu-exec.c | ||
43 | @@ -XXX,XX +XXX,XX @@ void cpu_exec_step_atomic(CPUState *cpu) | ||
44 | uint32_t cf_mask = cflags & CF_HASH_MASK; | ||
45 | |||
46 | if (sigsetjmp(cpu->jmp_env, 0) == 0) { | ||
47 | + start_exclusive(); | ||
48 | + | ||
49 | tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask); | ||
50 | if (tb == NULL) { | ||
51 | mmap_lock(); | ||
52 | @@ -XXX,XX +XXX,XX @@ void cpu_exec_step_atomic(CPUState *cpu) | ||
53 | mmap_unlock(); | ||
54 | } | ||
55 | |||
56 | - start_exclusive(); | ||
57 | - | ||
58 | /* Since we got here, we know that parallel_cpus must be true. */ | ||
59 | parallel_cpus = false; | ||
60 | cc->cpu_exec_enter(cpu); | ||
61 | @@ -XXX,XX +XXX,XX @@ void cpu_exec_step_atomic(CPUState *cpu) | ||
62 | qemu_plugin_disable_mem_helpers(cpu); | ||
63 | } | ||
64 | |||
65 | - if (cpu_in_exclusive_context(cpu)) { | ||
66 | - /* We might longjump out of either the codegen or the | ||
67 | - * execution, so must make sure we only end the exclusive | ||
68 | - * region if we started it. | ||
69 | - */ | ||
70 | - parallel_cpus = true; | ||
71 | - end_exclusive(); | ||
72 | - } | ||
73 | + | ||
74 | + /* | ||
75 | + * As we start the exclusive region before codegen we must still | ||
76 | + * be in the region if we longjump out of either the codegen or | ||
77 | + * the execution. | ||
78 | + */ | ||
79 | + g_assert(cpu_in_exclusive_context(cpu)); | ||
80 | + parallel_cpus = true; | ||
81 | + end_exclusive(); | ||
82 | } | ||
83 | |||
84 | struct tb_desc { | ||
85 | -- | ||
86 | 2.20.1 | ||
87 | |||
88 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Zenghui Yu <yuzenghui@huawei.com> | ||
2 | 1 | ||
3 | Our robot reported the following compile-time warning while compiling | ||
4 | Qemu with -fno-inline cflags: | ||
5 | |||
6 | In function 'load_memop', | ||
7 | inlined from 'load_helper' at /qemu/accel/tcg/cputlb.c:1578:20, | ||
8 | inlined from 'full_ldub_mmu' at /qemu/accel/tcg/cputlb.c:1624:12: | ||
9 | /qemu/accel/tcg/cputlb.c:1502:9: error: call to 'qemu_build_not_reached' declared with attribute error: code path is reachable | ||
10 | qemu_build_not_reached(); | ||
11 | ^~~~~~~~~~~~~~~~~~~~~~~~ | ||
12 | [...] | ||
13 | |||
14 | It looks like a false-positive because only (MO_UB ^ MO_BSWAP) will | ||
15 | hit the default case in load_memop() while need_swap (size > 1) has | ||
16 | already ensured that MO_UB is not involved. | ||
17 | |||
18 | So the thing is that compilers get confused by the -fno-inline and | ||
19 | just can't accurately evaluate memop_size(op) at compile time, and | ||
20 | then the qemu_build_not_reached() is wrongly triggered by (MO_UB ^ | ||
21 | MO_BSWAP). Let's carefully don't use the compile-time assert when | ||
22 | no functions will be inlined into their callers. | ||
23 | |||
24 | Reported-by: Euler Robot <euler.robot@huawei.com> | ||
25 | Suggested-by: Richard Henderson <richard.henderson@linaro.org> | ||
26 | Signed-off-by: Zenghui Yu <yuzenghui@huawei.com> | ||
27 | Message-Id: <20200205141545.180-1-yuzenghui@huawei.com> | ||
28 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
29 | --- | ||
30 | include/qemu/compiler.h | 2 +- | ||
31 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
32 | |||
33 | diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h | ||
34 | index XXXXXXX..XXXXXXX 100644 | ||
35 | --- a/include/qemu/compiler.h | ||
36 | +++ b/include/qemu/compiler.h | ||
37 | @@ -XXX,XX +XXX,XX @@ | ||
38 | * supports QEMU_ERROR, this will be reported at compile time; otherwise | ||
39 | * this will be reported at link time due to the missing symbol. | ||
40 | */ | ||
41 | -#ifdef __OPTIMIZE__ | ||
42 | +#if defined(__OPTIMIZE__) && !defined(__NO_INLINE__) | ||
43 | extern void QEMU_NORETURN QEMU_ERROR("code path is reachable") | ||
44 | qemu_build_not_reached(void); | ||
45 | #else | ||
46 | -- | ||
47 | 2.20.1 | ||
48 | |||
49 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <rth@twiddle.net> | ||
2 | 1 | ||
3 | We will shortly use this function from tcg_out_op as well. | ||
4 | |||
5 | Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
6 | Signed-off-by: Richard Henderson <rth@twiddle.net> | ||
7 | --- | ||
8 | tcg/arm/tcg-target.inc.c | 19 +++++++++++-------- | ||
9 | 1 file changed, 11 insertions(+), 8 deletions(-) | ||
10 | |||
11 | diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/tcg/arm/tcg-target.inc.c | ||
14 | +++ b/tcg/arm/tcg-target.inc.c | ||
15 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) | ||
16 | } | ||
17 | |||
18 | static tcg_insn_unit *tb_ret_addr; | ||
19 | +static void tcg_out_epilogue(TCGContext *s); | ||
20 | |||
21 | static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, | ||
22 | const TCGArg *args, const int *const_args) | ||
23 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int count) | ||
24 | + TCG_TARGET_STACK_ALIGN - 1) \ | ||
25 | & -TCG_TARGET_STACK_ALIGN) | ||
26 | |||
27 | +#define STACK_ADDEND (FRAME_SIZE - PUSH_SIZE) | ||
28 | + | ||
29 | static void tcg_target_qemu_prologue(TCGContext *s) | ||
30 | { | ||
31 | - int stack_addend; | ||
32 | - | ||
33 | /* Calling convention requires us to save r4-r11 and lr. */ | ||
34 | /* stmdb sp!, { r4 - r11, lr } */ | ||
35 | tcg_out32(s, (COND_AL << 28) | 0x092d4ff0); | ||
36 | |||
37 | /* Reserve callee argument and tcg temp space. */ | ||
38 | - stack_addend = FRAME_SIZE - PUSH_SIZE; | ||
39 | - | ||
40 | tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK, | ||
41 | - TCG_REG_CALL_STACK, stack_addend, 1); | ||
42 | + TCG_REG_CALL_STACK, STACK_ADDEND, 1); | ||
43 | tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE, | ||
44 | CPU_TEMP_BUF_NLONGS * sizeof(long)); | ||
45 | |||
46 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s) | ||
47 | */ | ||
48 | s->code_gen_epilogue = s->code_ptr; | ||
49 | tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 0); | ||
50 | - | ||
51 | - /* TB epilogue */ | ||
52 | tb_ret_addr = s->code_ptr; | ||
53 | + tcg_out_epilogue(s); | ||
54 | +} | ||
55 | + | ||
56 | +static void tcg_out_epilogue(TCGContext *s) | ||
57 | +{ | ||
58 | + /* Release local stack frame. */ | ||
59 | tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK, | ||
60 | - TCG_REG_CALL_STACK, stack_addend, 1); | ||
61 | + TCG_REG_CALL_STACK, STACK_ADDEND, 1); | ||
62 | |||
63 | /* ldmia sp!, { r4 - r11, pc } */ | ||
64 | tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0); | ||
65 | -- | ||
66 | 2.20.1 | ||
67 | |||
68 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <rth@twiddle.net> | ||
2 | 1 | ||
3 | It is, after all, just two instructions. | ||
4 | |||
5 | Profiling on a cortex-a15, using -d nochain to increase the number | ||
6 | of exit_tb that are executed, shows a minor improvement of 0.5%. | ||
7 | |||
8 | Signed-off-by: Richard Henderson <rth@twiddle.net> | ||
9 | --- | ||
10 | tcg/arm/tcg-target.inc.c | 12 ++---------- | ||
11 | 1 file changed, 2 insertions(+), 10 deletions(-) | ||
12 | |||
13 | diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/tcg/arm/tcg-target.inc.c | ||
16 | +++ b/tcg/arm/tcg-target.inc.c | ||
17 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) | ||
18 | #endif | ||
19 | } | ||
20 | |||
21 | -static tcg_insn_unit *tb_ret_addr; | ||
22 | static void tcg_out_epilogue(TCGContext *s); | ||
23 | |||
24 | static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, | ||
25 | @@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, | ||
26 | |||
27 | switch (opc) { | ||
28 | case INDEX_op_exit_tb: | ||
29 | - /* Reuse the zeroing that exists for goto_ptr. */ | ||
30 | - a0 = args[0]; | ||
31 | - if (a0 == 0) { | ||
32 | - tcg_out_goto(s, COND_AL, s->code_gen_epilogue); | ||
33 | - } else { | ||
34 | - tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]); | ||
35 | - tcg_out_goto(s, COND_AL, tb_ret_addr); | ||
36 | - } | ||
37 | + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, args[0]); | ||
38 | + tcg_out_epilogue(s); | ||
39 | break; | ||
40 | case INDEX_op_goto_tb: | ||
41 | { | ||
42 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s) | ||
43 | */ | ||
44 | s->code_gen_epilogue = s->code_ptr; | ||
45 | tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 0); | ||
46 | - tb_ret_addr = s->code_ptr; | ||
47 | tcg_out_epilogue(s); | ||
48 | } | ||
49 | |||
50 | -- | ||
51 | 2.20.1 | ||
52 | |||
53 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Alex Bennée <alex.bennee@linaro.org> | ||
2 | 1 | ||
3 | It's easier to read. | ||
4 | |||
5 | Signed-off-by: Alex Bennée <alex.bennee@linaro.org> | ||
6 | Reviewed-by: Niek Linnenbank <nieklinnenbank@gmail.com> | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
9 | Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
10 | Message-Id: <20200228192415.19867-2-alex.bennee@linaro.org> | ||
11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | --- | ||
13 | accel/tcg/translate-all.c | 19 ++++++++++--------- | ||
14 | 1 file changed, 10 insertions(+), 9 deletions(-) | ||
15 | |||
16 | diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/accel/tcg/translate-all.c | ||
19 | +++ b/accel/tcg/translate-all.c | ||
20 | @@ -XXX,XX +XXX,XX @@ | ||
21 | */ | ||
22 | |||
23 | #include "qemu/osdep.h" | ||
24 | +#include "qemu/units.h" | ||
25 | #include "qemu-common.h" | ||
26 | |||
27 | #define NO_CPU_IO_DEFS | ||
28 | @@ -XXX,XX +XXX,XX @@ static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1, | ||
29 | |||
30 | /* Minimum size of the code gen buffer. This number is randomly chosen, | ||
31 | but not so small that we can't have a fair number of TB's live. */ | ||
32 | -#define MIN_CODE_GEN_BUFFER_SIZE (1024u * 1024) | ||
33 | +#define MIN_CODE_GEN_BUFFER_SIZE (1 * MiB) | ||
34 | |||
35 | /* Maximum size of the code gen buffer we'd like to use. Unless otherwise | ||
36 | indicated, this is constrained by the range of direct branches on the | ||
37 | host cpu, as used by the TCG implementation of goto_tb. */ | ||
38 | #if defined(__x86_64__) | ||
39 | -# define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024) | ||
40 | +# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB) | ||
41 | #elif defined(__sparc__) | ||
42 | -# define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024) | ||
43 | +# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB) | ||
44 | #elif defined(__powerpc64__) | ||
45 | -# define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024) | ||
46 | +# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB) | ||
47 | #elif defined(__powerpc__) | ||
48 | -# define MAX_CODE_GEN_BUFFER_SIZE (32u * 1024 * 1024) | ||
49 | +# define MAX_CODE_GEN_BUFFER_SIZE (32 * MiB) | ||
50 | #elif defined(__aarch64__) | ||
51 | -# define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024) | ||
52 | +# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB) | ||
53 | #elif defined(__s390x__) | ||
54 | /* We have a +- 4GB range on the branches; leave some slop. */ | ||
55 | -# define MAX_CODE_GEN_BUFFER_SIZE (3ul * 1024 * 1024 * 1024) | ||
56 | +# define MAX_CODE_GEN_BUFFER_SIZE (3 * GiB) | ||
57 | #elif defined(__mips__) | ||
58 | /* We have a 256MB branch region, but leave room to make sure the | ||
59 | main executable is also within that region. */ | ||
60 | -# define MAX_CODE_GEN_BUFFER_SIZE (128ul * 1024 * 1024) | ||
61 | +# define MAX_CODE_GEN_BUFFER_SIZE (128 * MiB) | ||
62 | #else | ||
63 | # define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) | ||
64 | #endif | ||
65 | |||
66 | -#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024) | ||
67 | +#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32 * MiB) | ||
68 | |||
69 | #define DEFAULT_CODE_GEN_BUFFER_SIZE \ | ||
70 | (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \ | ||
71 | -- | ||
72 | 2.20.1 | ||
73 | |||
74 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Alex Bennée <alex.bennee@linaro.org> | ||
2 | 1 | ||
3 | Basing the TB cache size on the ram_size was always a little heuristic | ||
4 | and was broken by a1b18df9a4 which caused ram_size not to be fully | ||
5 | realised at the time we initialise the TCG translation cache. | ||
6 | |||
7 | The current DEFAULT_CODE_GEN_BUFFER_SIZE may still be a little small | ||
8 | but follow-up patches will address that. | ||
9 | |||
10 | Fixes: a1b18df9a4 | ||
11 | Cc: Igor Mammedov <imammedo@redhat.com> | ||
12 | Signed-off-by: Alex Bennée <alex.bennee@linaro.org> | ||
13 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
14 | Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
15 | Reviewed-by: Niek Linnenbank <nieklinnenbank@gmail.com> | ||
16 | Message-Id: <20200228192415.19867-3-alex.bennee@linaro.org> | ||
17 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
18 | --- | ||
19 | accel/tcg/translate-all.c | 8 -------- | ||
20 | 1 file changed, 8 deletions(-) | ||
21 | |||
22 | diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c | ||
23 | index XXXXXXX..XXXXXXX 100644 | ||
24 | --- a/accel/tcg/translate-all.c | ||
25 | +++ b/accel/tcg/translate-all.c | ||
26 | @@ -XXX,XX +XXX,XX @@ static inline size_t size_code_gen_buffer(size_t tb_size) | ||
27 | { | ||
28 | /* Size the buffer. */ | ||
29 | if (tb_size == 0) { | ||
30 | -#ifdef USE_STATIC_CODE_GEN_BUFFER | ||
31 | tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE; | ||
32 | -#else | ||
33 | - /* ??? Needs adjustments. */ | ||
34 | - /* ??? If we relax the requirement that CONFIG_USER_ONLY use the | ||
35 | - static buffer, we could size this on RESERVED_VA, on the text | ||
36 | - segment size of the executable, or continue to use the default. */ | ||
37 | - tb_size = (unsigned long)(ram_size / 4); | ||
38 | -#endif | ||
39 | } | ||
40 | if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) { | ||
41 | tb_size = MIN_CODE_GEN_BUFFER_SIZE; | ||
42 | -- | ||
43 | 2.20.1 | ||
44 | |||
45 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Alex Bennée <alex.bennee@linaro.org> | ||
2 | 1 | ||
3 | There is no particular reason to use a static codegen buffer on 64 bit | ||
4 | hosts as we have address space to burn. Allow the common CONFIG_USER | ||
5 | case to use the mmap'ed buffers like SoftMMU. | ||
6 | |||
7 | Signed-off-by: Alex Bennée <alex.bennee@linaro.org> | ||
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
10 | Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
11 | Reviewed-by: Niek Linnenbank <nieklinnenbank@gmail.com> | ||
12 | Message-Id: <20200228192415.19867-4-alex.bennee@linaro.org> | ||
13 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
14 | --- | ||
15 | accel/tcg/translate-all.c | 11 ++++++----- | ||
16 | 1 file changed, 6 insertions(+), 5 deletions(-) | ||
17 | |||
18 | diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/accel/tcg/translate-all.c | ||
21 | +++ b/accel/tcg/translate-all.c | ||
22 | @@ -XXX,XX +XXX,XX @@ static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1, | ||
23 | } | ||
24 | } | ||
25 | |||
26 | -#if defined(CONFIG_USER_ONLY) | ||
27 | -/* Currently it is not recommended to allocate big chunks of data in | ||
28 | - user mode. It will change when a dedicated libc will be used. */ | ||
29 | -/* ??? 64-bit hosts ought to have no problem mmaping data outside the | ||
30 | - region in which the guest needs to run. Revisit this. */ | ||
31 | +#if defined(CONFIG_USER_ONLY) && TCG_TARGET_REG_BITS == 32 | ||
32 | +/* | ||
33 | + * For user mode on smaller 32 bit systems we may run into trouble | ||
34 | + * allocating big chunks of data in the right place. On these systems | ||
35 | + * we utilise a static code generation buffer directly in the binary. | ||
36 | + */ | ||
37 | #define USE_STATIC_CODE_GEN_BUFFER | ||
38 | #endif | ||
39 | |||
40 | -- | ||
41 | 2.20.1 | ||
42 | |||
43 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Alex Bennée <alex.bennee@linaro.org> | ||
2 | 1 | ||
3 | While 32mb is certainly usable a full system boot ends up flushing the | ||
4 | codegen buffer nearly 100 times. Increase the default on 64 bit hosts | ||
5 | to take advantage of all that spare memory. After this change I can | ||
6 | boot my tests system without any TB flushes. | ||
7 | |||
8 | As we usually run more CONFIG_USER binaries at a time in typical usage | ||
9 | we aren't quite as profligate for user-mode code generation usage. We | ||
10 | also bring the static code gen defies to the same place to keep all | ||
11 | the reasoning in the comments together. | ||
12 | |||
13 | Signed-off-by: Alex Bennée <alex.bennee@linaro.org> | ||
14 | Tested-by: Niek Linnenbank <nieklinnenbank@gmail.com> | ||
15 | Reviewed-by: Niek Linnenbank <nieklinnenbank@gmail.com> | ||
16 | Message-Id: <20200228192415.19867-5-alex.bennee@linaro.org> | ||
17 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
18 | --- | ||
19 | accel/tcg/translate-all.c | 35 ++++++++++++++++++++++++++--------- | ||
20 | 1 file changed, 26 insertions(+), 9 deletions(-) | ||
21 | |||
22 | diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c | ||
23 | index XXXXXXX..XXXXXXX 100644 | ||
24 | --- a/accel/tcg/translate-all.c | ||
25 | +++ b/accel/tcg/translate-all.c | ||
26 | @@ -XXX,XX +XXX,XX @@ static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1, | ||
27 | } | ||
28 | } | ||
29 | |||
30 | -#if defined(CONFIG_USER_ONLY) && TCG_TARGET_REG_BITS == 32 | ||
31 | -/* | ||
32 | - * For user mode on smaller 32 bit systems we may run into trouble | ||
33 | - * allocating big chunks of data in the right place. On these systems | ||
34 | - * we utilise a static code generation buffer directly in the binary. | ||
35 | - */ | ||
36 | -#define USE_STATIC_CODE_GEN_BUFFER | ||
37 | -#endif | ||
38 | - | ||
39 | /* Minimum size of the code gen buffer. This number is randomly chosen, | ||
40 | but not so small that we can't have a fair number of TB's live. */ | ||
41 | #define MIN_CODE_GEN_BUFFER_SIZE (1 * MiB) | ||
42 | @@ -XXX,XX +XXX,XX @@ static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1, | ||
43 | # define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) | ||
44 | #endif | ||
45 | |||
46 | +#if TCG_TARGET_REG_BITS == 32 | ||
47 | #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32 * MiB) | ||
48 | +#ifdef CONFIG_USER_ONLY | ||
49 | +/* | ||
50 | + * For user mode on smaller 32 bit systems we may run into trouble | ||
51 | + * allocating big chunks of data in the right place. On these systems | ||
52 | + * we utilise a static code generation buffer directly in the binary. | ||
53 | + */ | ||
54 | +#define USE_STATIC_CODE_GEN_BUFFER | ||
55 | +#endif | ||
56 | +#else /* TCG_TARGET_REG_BITS == 64 */ | ||
57 | +#ifdef CONFIG_USER_ONLY | ||
58 | +/* | ||
59 | + * As user-mode emulation typically means running multiple instances | ||
60 | + * of the translator don't go too nuts with our default code gen | ||
61 | + * buffer lest we make things too hard for the OS. | ||
62 | + */ | ||
63 | +#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (128 * MiB) | ||
64 | +#else | ||
65 | +/* | ||
66 | + * We expect most system emulation to run one or two guests per host. | ||
67 | + * Users running large scale system emulation may want to tweak their | ||
68 | + * runtime setup via the tb-size control on the command line. | ||
69 | + */ | ||
70 | +#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (1 * GiB) | ||
71 | +#endif | ||
72 | +#endif | ||
73 | |||
74 | #define DEFAULT_CODE_GEN_BUFFER_SIZE \ | ||
75 | (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \ | ||
76 | -- | ||
77 | 2.20.1 | ||
78 | |||
79 | diff view generated by jsdifflib |