1 | The following changes since commit e0175b71638cf4398903c0d25f93fe62e0606389: | 1 | The following changes since commit eb6490f544388dd24c0d054a96dd304bc7284450: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20200228' into staging (2020-02-28 16:39:27 +0000) | 3 | Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20200703' into staging (2020-07-04 16:08:41 +0100) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | https://github.com/rth7680/qemu.git tags/pull-tcg-20200228 | 7 | https://github.com/rth7680/qemu.git tags/pull-tcg-20200706 |
8 | 8 | ||
9 | for you to fetch changes up to 600e17b261555c56a048781b8dd5ba3985650013: | 9 | for you to fetch changes up to 852f933e482518797f7785a2e017a215b88df815: |
10 | 10 | ||
11 | accel/tcg: increase default code gen buffer size for 64 bit (2020-02-28 17:43:31 -0800) | 11 | tcg: Fix do_nonatomic_op_* vs signed operations (2020-07-06 10:58:19 -0700) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Fix race in cpu_exec_step_atomic. | 14 | Fix for ppc shifts |
15 | Work around compile failure with -fno-inine. | 15 | Fix for non-parallel atomic ops |
16 | Expand tcg/arm epilogue inline. | ||
17 | Adjustments to the default code gen buffer size. | ||
18 | 16 | ||
19 | ---------------------------------------------------------------- | 17 | ---------------------------------------------------------------- |
20 | Alex Bennée (5): | 18 | Catherine A. Frederick (1): |
21 | accel/tcg: fix race in cpu_exec_step_atomic (bug 1863025) | 19 | tcg/ppc: Sanitize immediate shifts |
22 | accel/tcg: use units.h for defining code gen buffer sizes | ||
23 | accel/tcg: remove link between guest ram and TCG cache size | ||
24 | accel/tcg: only USE_STATIC_CODE_GEN_BUFFER on 32 bit hosts | ||
25 | accel/tcg: increase default code gen buffer size for 64 bit | ||
26 | 20 | ||
27 | Richard Henderson (2): | 21 | Richard Henderson (1): |
28 | tcg/arm: Split out tcg_out_epilogue | 22 | tcg: Fix do_nonatomic_op_* vs signed operations |
29 | tcg/arm: Expand epilogue inline | ||
30 | 23 | ||
31 | Zenghui Yu (1): | 24 | tcg/ppc/tcg-target.inc.c | 15 ++++++++++----- |
32 | compiler.h: Don't use compile-time assert when __NO_INLINE__ is defined | 25 | tcg/tcg-op.c | 10 ++++++---- |
26 | 2 files changed, 16 insertions(+), 9 deletions(-) | ||
33 | 27 | ||
34 | include/qemu/compiler.h | 2 +- | ||
35 | accel/tcg/cpu-exec.c | 21 ++++++++-------- | ||
36 | accel/tcg/translate-all.c | 61 ++++++++++++++++++++++++++++------------------- | ||
37 | tcg/arm/tcg-target.inc.c | 29 ++++++++++------------ | ||
38 | 4 files changed, 60 insertions(+), 53 deletions(-) | ||
39 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Alex Bennée <alex.bennee@linaro.org> | ||
2 | 1 | ||
3 | The bug describes a race whereby cpu_exec_step_atomic can acquire a TB | ||
4 | which is invalidated by a tb_flush before we execute it. This doesn't | ||
5 | affect the other cpu_exec modes as a tb_flush by it's nature can only | ||
6 | occur on a quiescent system. The race was described as: | ||
7 | |||
8 | B2. tcg_cpu_exec => cpu_exec => tb_find => tb_gen_code | ||
9 | B3. tcg_tb_alloc obtains a new TB | ||
10 | |||
11 | C3. TB obtained with tb_lookup__cpu_state or tb_gen_code | ||
12 | (same TB as B2) | ||
13 | |||
14 | A3. start_exclusive critical section entered | ||
15 | A4. do_tb_flush is called, TB memory freed/re-allocated | ||
16 | A5. end_exclusive exits critical section | ||
17 | |||
18 | B2. tcg_cpu_exec => cpu_exec => tb_find => tb_gen_code | ||
19 | B3. tcg_tb_alloc reallocates TB from B2 | ||
20 | |||
21 | C4. start_exclusive critical section entered | ||
22 | C5. cpu_tb_exec executes the TB code that was free in A4 | ||
23 | |||
24 | The simplest fix is to widen the exclusive period to include the TB | ||
25 | lookup. As a result we can drop the complication of checking we are in | ||
26 | the exclusive region before we end it. | ||
27 | |||
28 | Cc: Yifan <me@yifanlu.com> | ||
29 | Buglink: https://bugs.launchpad.net/qemu/+bug/1863025 | ||
30 | Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> | ||
31 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
32 | Signed-off-by: Alex Bennée <alex.bennee@linaro.org> | ||
33 | Message-Id: <20200214144952.15502-1-alex.bennee@linaro.org> | ||
34 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
35 | --- | ||
36 | accel/tcg/cpu-exec.c | 21 +++++++++++---------- | ||
37 | 1 file changed, 11 insertions(+), 10 deletions(-) | ||
38 | |||
39 | diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c | ||
40 | index XXXXXXX..XXXXXXX 100644 | ||
41 | --- a/accel/tcg/cpu-exec.c | ||
42 | +++ b/accel/tcg/cpu-exec.c | ||
43 | @@ -XXX,XX +XXX,XX @@ void cpu_exec_step_atomic(CPUState *cpu) | ||
44 | uint32_t cf_mask = cflags & CF_HASH_MASK; | ||
45 | |||
46 | if (sigsetjmp(cpu->jmp_env, 0) == 0) { | ||
47 | + start_exclusive(); | ||
48 | + | ||
49 | tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask); | ||
50 | if (tb == NULL) { | ||
51 | mmap_lock(); | ||
52 | @@ -XXX,XX +XXX,XX @@ void cpu_exec_step_atomic(CPUState *cpu) | ||
53 | mmap_unlock(); | ||
54 | } | ||
55 | |||
56 | - start_exclusive(); | ||
57 | - | ||
58 | /* Since we got here, we know that parallel_cpus must be true. */ | ||
59 | parallel_cpus = false; | ||
60 | cc->cpu_exec_enter(cpu); | ||
61 | @@ -XXX,XX +XXX,XX @@ void cpu_exec_step_atomic(CPUState *cpu) | ||
62 | qemu_plugin_disable_mem_helpers(cpu); | ||
63 | } | ||
64 | |||
65 | - if (cpu_in_exclusive_context(cpu)) { | ||
66 | - /* We might longjump out of either the codegen or the | ||
67 | - * execution, so must make sure we only end the exclusive | ||
68 | - * region if we started it. | ||
69 | - */ | ||
70 | - parallel_cpus = true; | ||
71 | - end_exclusive(); | ||
72 | - } | ||
73 | + | ||
74 | + /* | ||
75 | + * As we start the exclusive region before codegen we must still | ||
76 | + * be in the region if we longjump out of either the codegen or | ||
77 | + * the execution. | ||
78 | + */ | ||
79 | + g_assert(cpu_in_exclusive_context(cpu)); | ||
80 | + parallel_cpus = true; | ||
81 | + end_exclusive(); | ||
82 | } | ||
83 | |||
84 | struct tb_desc { | ||
85 | -- | ||
86 | 2.20.1 | ||
87 | |||
88 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Zenghui Yu <yuzenghui@huawei.com> | ||
2 | 1 | ||
3 | Our robot reported the following compile-time warning while compiling | ||
4 | Qemu with -fno-inline cflags: | ||
5 | |||
6 | In function 'load_memop', | ||
7 | inlined from 'load_helper' at /qemu/accel/tcg/cputlb.c:1578:20, | ||
8 | inlined from 'full_ldub_mmu' at /qemu/accel/tcg/cputlb.c:1624:12: | ||
9 | /qemu/accel/tcg/cputlb.c:1502:9: error: call to 'qemu_build_not_reached' declared with attribute error: code path is reachable | ||
10 | qemu_build_not_reached(); | ||
11 | ^~~~~~~~~~~~~~~~~~~~~~~~ | ||
12 | [...] | ||
13 | |||
14 | It looks like a false-positive because only (MO_UB ^ MO_BSWAP) will | ||
15 | hit the default case in load_memop() while need_swap (size > 1) has | ||
16 | already ensured that MO_UB is not involved. | ||
17 | |||
18 | So the thing is that compilers get confused by the -fno-inline and | ||
19 | just can't accurately evaluate memop_size(op) at compile time, and | ||
20 | then the qemu_build_not_reached() is wrongly triggered by (MO_UB ^ | ||
21 | MO_BSWAP). Let's carefully don't use the compile-time assert when | ||
22 | no functions will be inlined into their callers. | ||
23 | |||
24 | Reported-by: Euler Robot <euler.robot@huawei.com> | ||
25 | Suggested-by: Richard Henderson <richard.henderson@linaro.org> | ||
26 | Signed-off-by: Zenghui Yu <yuzenghui@huawei.com> | ||
27 | Message-Id: <20200205141545.180-1-yuzenghui@huawei.com> | ||
28 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
29 | --- | ||
30 | include/qemu/compiler.h | 2 +- | ||
31 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
32 | |||
33 | diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h | ||
34 | index XXXXXXX..XXXXXXX 100644 | ||
35 | --- a/include/qemu/compiler.h | ||
36 | +++ b/include/qemu/compiler.h | ||
37 | @@ -XXX,XX +XXX,XX @@ | ||
38 | * supports QEMU_ERROR, this will be reported at compile time; otherwise | ||
39 | * this will be reported at link time due to the missing symbol. | ||
40 | */ | ||
41 | -#ifdef __OPTIMIZE__ | ||
42 | +#if defined(__OPTIMIZE__) && !defined(__NO_INLINE__) | ||
43 | extern void QEMU_NORETURN QEMU_ERROR("code path is reachable") | ||
44 | qemu_build_not_reached(void); | ||
45 | #else | ||
46 | -- | ||
47 | 2.20.1 | ||
48 | |||
49 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <rth@twiddle.net> | ||
2 | 1 | ||
3 | We will shortly use this function from tcg_out_op as well. | ||
4 | |||
5 | Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
6 | Signed-off-by: Richard Henderson <rth@twiddle.net> | ||
7 | --- | ||
8 | tcg/arm/tcg-target.inc.c | 19 +++++++++++-------- | ||
9 | 1 file changed, 11 insertions(+), 8 deletions(-) | ||
10 | |||
11 | diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/tcg/arm/tcg-target.inc.c | ||
14 | +++ b/tcg/arm/tcg-target.inc.c | ||
15 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) | ||
16 | } | ||
17 | |||
18 | static tcg_insn_unit *tb_ret_addr; | ||
19 | +static void tcg_out_epilogue(TCGContext *s); | ||
20 | |||
21 | static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, | ||
22 | const TCGArg *args, const int *const_args) | ||
23 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int count) | ||
24 | + TCG_TARGET_STACK_ALIGN - 1) \ | ||
25 | & -TCG_TARGET_STACK_ALIGN) | ||
26 | |||
27 | +#define STACK_ADDEND (FRAME_SIZE - PUSH_SIZE) | ||
28 | + | ||
29 | static void tcg_target_qemu_prologue(TCGContext *s) | ||
30 | { | ||
31 | - int stack_addend; | ||
32 | - | ||
33 | /* Calling convention requires us to save r4-r11 and lr. */ | ||
34 | /* stmdb sp!, { r4 - r11, lr } */ | ||
35 | tcg_out32(s, (COND_AL << 28) | 0x092d4ff0); | ||
36 | |||
37 | /* Reserve callee argument and tcg temp space. */ | ||
38 | - stack_addend = FRAME_SIZE - PUSH_SIZE; | ||
39 | - | ||
40 | tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK, | ||
41 | - TCG_REG_CALL_STACK, stack_addend, 1); | ||
42 | + TCG_REG_CALL_STACK, STACK_ADDEND, 1); | ||
43 | tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE, | ||
44 | CPU_TEMP_BUF_NLONGS * sizeof(long)); | ||
45 | |||
46 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s) | ||
47 | */ | ||
48 | s->code_gen_epilogue = s->code_ptr; | ||
49 | tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 0); | ||
50 | - | ||
51 | - /* TB epilogue */ | ||
52 | tb_ret_addr = s->code_ptr; | ||
53 | + tcg_out_epilogue(s); | ||
54 | +} | ||
55 | + | ||
56 | +static void tcg_out_epilogue(TCGContext *s) | ||
57 | +{ | ||
58 | + /* Release local stack frame. */ | ||
59 | tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK, | ||
60 | - TCG_REG_CALL_STACK, stack_addend, 1); | ||
61 | + TCG_REG_CALL_STACK, STACK_ADDEND, 1); | ||
62 | |||
63 | /* ldmia sp!, { r4 - r11, pc } */ | ||
64 | tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0); | ||
65 | -- | ||
66 | 2.20.1 | ||
67 | |||
68 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Richard Henderson <rth@twiddle.net> | ||
2 | 1 | ||
3 | It is, after all, just two instructions. | ||
4 | |||
5 | Profiling on a cortex-a15, using -d nochain to increase the number | ||
6 | of exit_tb that are executed, shows a minor improvement of 0.5%. | ||
7 | |||
8 | Signed-off-by: Richard Henderson <rth@twiddle.net> | ||
9 | --- | ||
10 | tcg/arm/tcg-target.inc.c | 12 ++---------- | ||
11 | 1 file changed, 2 insertions(+), 10 deletions(-) | ||
12 | |||
13 | diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/tcg/arm/tcg-target.inc.c | ||
16 | +++ b/tcg/arm/tcg-target.inc.c | ||
17 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) | ||
18 | #endif | ||
19 | } | ||
20 | |||
21 | -static tcg_insn_unit *tb_ret_addr; | ||
22 | static void tcg_out_epilogue(TCGContext *s); | ||
23 | |||
24 | static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, | ||
25 | @@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, | ||
26 | |||
27 | switch (opc) { | ||
28 | case INDEX_op_exit_tb: | ||
29 | - /* Reuse the zeroing that exists for goto_ptr. */ | ||
30 | - a0 = args[0]; | ||
31 | - if (a0 == 0) { | ||
32 | - tcg_out_goto(s, COND_AL, s->code_gen_epilogue); | ||
33 | - } else { | ||
34 | - tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]); | ||
35 | - tcg_out_goto(s, COND_AL, tb_ret_addr); | ||
36 | - } | ||
37 | + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, args[0]); | ||
38 | + tcg_out_epilogue(s); | ||
39 | break; | ||
40 | case INDEX_op_goto_tb: | ||
41 | { | ||
42 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s) | ||
43 | */ | ||
44 | s->code_gen_epilogue = s->code_ptr; | ||
45 | tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 0); | ||
46 | - tb_ret_addr = s->code_ptr; | ||
47 | tcg_out_epilogue(s); | ||
48 | } | ||
49 | |||
50 | -- | ||
51 | 2.20.1 | ||
52 | |||
53 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Alex Bennée <alex.bennee@linaro.org> | ||
2 | 1 | ||
3 | It's easier to read. | ||
4 | |||
5 | Signed-off-by: Alex Bennée <alex.bennee@linaro.org> | ||
6 | Reviewed-by: Niek Linnenbank <nieklinnenbank@gmail.com> | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
9 | Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
10 | Message-Id: <20200228192415.19867-2-alex.bennee@linaro.org> | ||
11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | --- | ||
13 | accel/tcg/translate-all.c | 19 ++++++++++--------- | ||
14 | 1 file changed, 10 insertions(+), 9 deletions(-) | ||
15 | |||
16 | diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/accel/tcg/translate-all.c | ||
19 | +++ b/accel/tcg/translate-all.c | ||
20 | @@ -XXX,XX +XXX,XX @@ | ||
21 | */ | ||
22 | |||
23 | #include "qemu/osdep.h" | ||
24 | +#include "qemu/units.h" | ||
25 | #include "qemu-common.h" | ||
26 | |||
27 | #define NO_CPU_IO_DEFS | ||
28 | @@ -XXX,XX +XXX,XX @@ static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1, | ||
29 | |||
30 | /* Minimum size of the code gen buffer. This number is randomly chosen, | ||
31 | but not so small that we can't have a fair number of TB's live. */ | ||
32 | -#define MIN_CODE_GEN_BUFFER_SIZE (1024u * 1024) | ||
33 | +#define MIN_CODE_GEN_BUFFER_SIZE (1 * MiB) | ||
34 | |||
35 | /* Maximum size of the code gen buffer we'd like to use. Unless otherwise | ||
36 | indicated, this is constrained by the range of direct branches on the | ||
37 | host cpu, as used by the TCG implementation of goto_tb. */ | ||
38 | #if defined(__x86_64__) | ||
39 | -# define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024) | ||
40 | +# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB) | ||
41 | #elif defined(__sparc__) | ||
42 | -# define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024) | ||
43 | +# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB) | ||
44 | #elif defined(__powerpc64__) | ||
45 | -# define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024) | ||
46 | +# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB) | ||
47 | #elif defined(__powerpc__) | ||
48 | -# define MAX_CODE_GEN_BUFFER_SIZE (32u * 1024 * 1024) | ||
49 | +# define MAX_CODE_GEN_BUFFER_SIZE (32 * MiB) | ||
50 | #elif defined(__aarch64__) | ||
51 | -# define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024) | ||
52 | +# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB) | ||
53 | #elif defined(__s390x__) | ||
54 | /* We have a +- 4GB range on the branches; leave some slop. */ | ||
55 | -# define MAX_CODE_GEN_BUFFER_SIZE (3ul * 1024 * 1024 * 1024) | ||
56 | +# define MAX_CODE_GEN_BUFFER_SIZE (3 * GiB) | ||
57 | #elif defined(__mips__) | ||
58 | /* We have a 256MB branch region, but leave room to make sure the | ||
59 | main executable is also within that region. */ | ||
60 | -# define MAX_CODE_GEN_BUFFER_SIZE (128ul * 1024 * 1024) | ||
61 | +# define MAX_CODE_GEN_BUFFER_SIZE (128 * MiB) | ||
62 | #else | ||
63 | # define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) | ||
64 | #endif | ||
65 | |||
66 | -#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024) | ||
67 | +#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32 * MiB) | ||
68 | |||
69 | #define DEFAULT_CODE_GEN_BUFFER_SIZE \ | ||
70 | (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \ | ||
71 | -- | ||
72 | 2.20.1 | ||
73 | |||
74 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Alex Bennée <alex.bennee@linaro.org> | ||
2 | 1 | ||
3 | Basing the TB cache size on the ram_size was always a little heuristic | ||
4 | and was broken by a1b18df9a4 which caused ram_size not to be fully | ||
5 | realised at the time we initialise the TCG translation cache. | ||
6 | |||
7 | The current DEFAULT_CODE_GEN_BUFFER_SIZE may still be a little small | ||
8 | but follow-up patches will address that. | ||
9 | |||
10 | Fixes: a1b18df9a4 | ||
11 | Cc: Igor Mammedov <imammedo@redhat.com> | ||
12 | Signed-off-by: Alex Bennée <alex.bennee@linaro.org> | ||
13 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
14 | Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
15 | Reviewed-by: Niek Linnenbank <nieklinnenbank@gmail.com> | ||
16 | Message-Id: <20200228192415.19867-3-alex.bennee@linaro.org> | ||
17 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
18 | --- | ||
19 | accel/tcg/translate-all.c | 8 -------- | ||
20 | 1 file changed, 8 deletions(-) | ||
21 | |||
22 | diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c | ||
23 | index XXXXXXX..XXXXXXX 100644 | ||
24 | --- a/accel/tcg/translate-all.c | ||
25 | +++ b/accel/tcg/translate-all.c | ||
26 | @@ -XXX,XX +XXX,XX @@ static inline size_t size_code_gen_buffer(size_t tb_size) | ||
27 | { | ||
28 | /* Size the buffer. */ | ||
29 | if (tb_size == 0) { | ||
30 | -#ifdef USE_STATIC_CODE_GEN_BUFFER | ||
31 | tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE; | ||
32 | -#else | ||
33 | - /* ??? Needs adjustments. */ | ||
34 | - /* ??? If we relax the requirement that CONFIG_USER_ONLY use the | ||
35 | - static buffer, we could size this on RESERVED_VA, on the text | ||
36 | - segment size of the executable, or continue to use the default. */ | ||
37 | - tb_size = (unsigned long)(ram_size / 4); | ||
38 | -#endif | ||
39 | } | ||
40 | if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) { | ||
41 | tb_size = MIN_CODE_GEN_BUFFER_SIZE; | ||
42 | -- | ||
43 | 2.20.1 | ||
44 | |||
45 | diff view generated by jsdifflib |
1 | From: Alex Bennée <alex.bennee@linaro.org> | 1 | From: "Catherine A. Frederick" <chocola@animebitch.es> |
---|---|---|---|
2 | 2 | ||
3 | While 32mb is certainly usable a full system boot ends up flushing the | 3 | Sanitize shift constants so that shift operations with |
4 | codegen buffer nearly 100 times. Increase the default on 64 bit hosts | 4 | large constants don't generate invalid instructions. |
5 | to take advantage of all that spare memory. After this change I can | ||
6 | boot my tests system without any TB flushes. | ||
7 | 5 | ||
8 | As we usually run more CONFIG_USER binaries at a time in typical usage | 6 | Signed-off-by: Catherine A. Frederick <chocola@animebitch.es> |
9 | we aren't quite as profligate for user-mode code generation usage. We | 7 | Message-Id: <20200607211100.22858-1-agrecascino123@gmail.com> |
10 | also bring the static code gen defies to the same place to keep all | ||
11 | the reasoning in the comments together. | ||
12 | |||
13 | Signed-off-by: Alex Bennée <alex.bennee@linaro.org> | ||
14 | Tested-by: Niek Linnenbank <nieklinnenbank@gmail.com> | ||
15 | Reviewed-by: Niek Linnenbank <nieklinnenbank@gmail.com> | ||
16 | Message-Id: <20200228192415.19867-5-alex.bennee@linaro.org> | ||
17 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
18 | --- | 9 | --- |
19 | accel/tcg/translate-all.c | 35 ++++++++++++++++++++++++++--------- | 10 | tcg/ppc/tcg-target.inc.c | 15 ++++++++++----- |
20 | 1 file changed, 26 insertions(+), 9 deletions(-) | 11 | 1 file changed, 10 insertions(+), 5 deletions(-) |
21 | 12 | ||
22 | diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c | 13 | diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c |
23 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
24 | --- a/accel/tcg/translate-all.c | 15 | --- a/tcg/ppc/tcg-target.inc.c |
25 | +++ b/accel/tcg/translate-all.c | 16 | +++ b/tcg/ppc/tcg-target.inc.c |
26 | @@ -XXX,XX +XXX,XX @@ static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1, | 17 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, |
27 | } | 18 | |
28 | } | 19 | case INDEX_op_shl_i32: |
29 | 20 | if (const_args[2]) { | |
30 | -#if defined(CONFIG_USER_ONLY) && TCG_TARGET_REG_BITS == 32 | 21 | - tcg_out_shli32(s, args[0], args[1], args[2]); |
31 | -/* | 22 | + /* Limit immediate shift count lest we create an illegal insn. */ |
32 | - * For user mode on smaller 32 bit systems we may run into trouble | 23 | + tcg_out_shli32(s, args[0], args[1], args[2] & 31); |
33 | - * allocating big chunks of data in the right place. On these systems | 24 | } else { |
34 | - * we utilise a static code generation buffer directly in the binary. | 25 | tcg_out32(s, SLW | SAB(args[1], args[0], args[2])); |
35 | - */ | 26 | } |
36 | -#define USE_STATIC_CODE_GEN_BUFFER | 27 | break; |
37 | -#endif | 28 | case INDEX_op_shr_i32: |
38 | - | 29 | if (const_args[2]) { |
39 | /* Minimum size of the code gen buffer. This number is randomly chosen, | 30 | - tcg_out_shri32(s, args[0], args[1], args[2]); |
40 | but not so small that we can't have a fair number of TB's live. */ | 31 | + /* Limit immediate shift count lest we create an illegal insn. */ |
41 | #define MIN_CODE_GEN_BUFFER_SIZE (1 * MiB) | 32 | + tcg_out_shri32(s, args[0], args[1], args[2] & 31); |
42 | @@ -XXX,XX +XXX,XX @@ static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1, | 33 | } else { |
43 | # define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) | 34 | tcg_out32(s, SRW | SAB(args[1], args[0], args[2])); |
44 | #endif | 35 | } |
45 | 36 | break; | |
46 | +#if TCG_TARGET_REG_BITS == 32 | 37 | case INDEX_op_sar_i32: |
47 | #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32 * MiB) | 38 | if (const_args[2]) { |
48 | +#ifdef CONFIG_USER_ONLY | 39 | - tcg_out32(s, SRAWI | RS(args[1]) | RA(args[0]) | SH(args[2])); |
49 | +/* | 40 | + /* Limit immediate shift count lest we create an illegal insn. */ |
50 | + * For user mode on smaller 32 bit systems we may run into trouble | 41 | + tcg_out32(s, SRAWI | RS(args[1]) | RA(args[0]) | SH(args[2] & 31)); |
51 | + * allocating big chunks of data in the right place. On these systems | 42 | } else { |
52 | + * we utilise a static code generation buffer directly in the binary. | 43 | tcg_out32(s, SRAW | SAB(args[1], args[0], args[2])); |
53 | + */ | 44 | } |
54 | +#define USE_STATIC_CODE_GEN_BUFFER | 45 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, |
55 | +#endif | 46 | |
56 | +#else /* TCG_TARGET_REG_BITS == 64 */ | 47 | case INDEX_op_shl_i64: |
57 | +#ifdef CONFIG_USER_ONLY | 48 | if (const_args[2]) { |
58 | +/* | 49 | - tcg_out_shli64(s, args[0], args[1], args[2]); |
59 | + * As user-mode emulation typically means running multiple instances | 50 | + /* Limit immediate shift count lest we create an illegal insn. */ |
60 | + * of the translator don't go too nuts with our default code gen | 51 | + tcg_out_shli64(s, args[0], args[1], args[2] & 63); |
61 | + * buffer lest we make things too hard for the OS. | 52 | } else { |
62 | + */ | 53 | tcg_out32(s, SLD | SAB(args[1], args[0], args[2])); |
63 | +#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (128 * MiB) | 54 | } |
64 | +#else | 55 | break; |
65 | +/* | 56 | case INDEX_op_shr_i64: |
66 | + * We expect most system emulation to run one or two guests per host. | 57 | if (const_args[2]) { |
67 | + * Users running large scale system emulation may want to tweak their | 58 | - tcg_out_shri64(s, args[0], args[1], args[2]); |
68 | + * runtime setup via the tb-size control on the command line. | 59 | + /* Limit immediate shift count lest we create an illegal insn. */ |
69 | + */ | 60 | + tcg_out_shri64(s, args[0], args[1], args[2] & 63); |
70 | +#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (1 * GiB) | 61 | } else { |
71 | +#endif | 62 | tcg_out32(s, SRD | SAB(args[1], args[0], args[2])); |
72 | +#endif | 63 | } |
73 | |||
74 | #define DEFAULT_CODE_GEN_BUFFER_SIZE \ | ||
75 | (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \ | ||
76 | -- | 64 | -- |
77 | 2.20.1 | 65 | 2.25.1 |
78 | 66 | ||
79 | 67 | diff view generated by jsdifflib |
1 | From: Alex Bennée <alex.bennee@linaro.org> | 1 | The smin/smax/umin/umax operations require the operands to be |
---|---|---|---|
2 | properly sign extended. Do not drop the MO_SIGN bit from the | ||
3 | load, and additionally extend the val input. | ||
2 | 4 | ||
3 | There is no particular reason to use a static codegen buffer on 64 bit | 5 | Reviewed-by: LIU Zhiwei <zhiwei_liu@c-sky.com> |
4 | hosts as we have address space to burn. Allow the common CONFIG_USER | 6 | Reported-by: LIU Zhiwei <zhiwei_liu@c-sky.com> |
5 | case to use the mmap'ed buffers like SoftMMU. | 7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
8 | Message-Id: <20200701165646.1901320-1-richard.henderson@linaro.org> | ||
9 | --- | ||
10 | tcg/tcg-op.c | 10 ++++++---- | ||
11 | 1 file changed, 6 insertions(+), 4 deletions(-) | ||
6 | 12 | ||
7 | Signed-off-by: Alex Bennée <alex.bennee@linaro.org> | 13 | diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c |
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
10 | Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
11 | Reviewed-by: Niek Linnenbank <nieklinnenbank@gmail.com> | ||
12 | Message-Id: <20200228192415.19867-4-alex.bennee@linaro.org> | ||
13 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
14 | --- | ||
15 | accel/tcg/translate-all.c | 11 ++++++----- | ||
16 | 1 file changed, 6 insertions(+), 5 deletions(-) | ||
17 | |||
18 | diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/accel/tcg/translate-all.c | 15 | --- a/tcg/tcg-op.c |
21 | +++ b/accel/tcg/translate-all.c | 16 | +++ b/tcg/tcg-op.c |
22 | @@ -XXX,XX +XXX,XX @@ static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1, | 17 | @@ -XXX,XX +XXX,XX @@ static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val, |
23 | } | 18 | |
24 | } | 19 | memop = tcg_canonicalize_memop(memop, 0, 0); |
25 | 20 | ||
26 | -#if defined(CONFIG_USER_ONLY) | 21 | - tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN); |
27 | -/* Currently it is not recommended to allocate big chunks of data in | 22 | - gen(t2, t1, val); |
28 | - user mode. It will change when a dedicated libc will be used. */ | 23 | + tcg_gen_qemu_ld_i32(t1, addr, idx, memop); |
29 | -/* ??? 64-bit hosts ought to have no problem mmaping data outside the | 24 | + tcg_gen_ext_i32(t2, val, memop); |
30 | - region in which the guest needs to run. Revisit this. */ | 25 | + gen(t2, t1, t2); |
31 | +#if defined(CONFIG_USER_ONLY) && TCG_TARGET_REG_BITS == 32 | 26 | tcg_gen_qemu_st_i32(t2, addr, idx, memop); |
32 | +/* | 27 | |
33 | + * For user mode on smaller 32 bit systems we may run into trouble | 28 | tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop); |
34 | + * allocating big chunks of data in the right place. On these systems | 29 | @@ -XXX,XX +XXX,XX @@ static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val, |
35 | + * we utilise a static code generation buffer directly in the binary. | 30 | |
36 | + */ | 31 | memop = tcg_canonicalize_memop(memop, 1, 0); |
37 | #define USE_STATIC_CODE_GEN_BUFFER | 32 | |
38 | #endif | 33 | - tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN); |
39 | 34 | - gen(t2, t1, val); | |
35 | + tcg_gen_qemu_ld_i64(t1, addr, idx, memop); | ||
36 | + tcg_gen_ext_i64(t2, val, memop); | ||
37 | + gen(t2, t1, t2); | ||
38 | tcg_gen_qemu_st_i64(t2, addr, idx, memop); | ||
39 | |||
40 | tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop); | ||
40 | -- | 41 | -- |
41 | 2.20.1 | 42 | 2.25.1 |
42 | 43 | ||
43 | 44 | diff view generated by jsdifflib |