1 | The following changes since commit d1181d29370a4318a9f11ea92065bea6bb159f83: | 1 | v2: Fix FreeBSD build error in patch 18. |
---|---|---|---|
2 | 2 | ||
3 | Merge tag 'pull-nbd-2023-07-19' of https://repo.or.cz/qemu/ericb into staging (2023-07-20 09:54:07 +0100) | 3 | r~ |
4 | |||
5 | |||
6 | The following changes since commit 0d239e513e0117e66fa739fb71a43b9383a108ff: | ||
7 | |||
8 | Merge tag 'pull-lu-20231018' of https://gitlab.com/rth7680/qemu into staging (2023-10-19 10:20:57 -0700) | ||
4 | 9 | ||
5 | are available in the Git repository at: | 10 | are available in the Git repository at: |
6 | 11 | ||
7 | https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230724 | 12 | https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20231018-2 |
8 | 13 | ||
9 | for you to fetch changes up to 32b120394c578bc824f1db4835b3bffbeca88fae: | 14 | for you to fetch changes up to a75f704d972b9408f5e2843784b3add48c724c52: |
10 | 15 | ||
11 | accel/tcg: Fix type of 'last' for pageflags_{find,next} (2023-07-24 09:48:49 +0100) | 16 | target/i386: Use i128 for 128 and 256-bit loads and stores (2023-10-19 21:11:44 -0700) |
12 | 17 | ||
13 | ---------------------------------------------------------------- | 18 | ---------------------------------------------------------------- |
14 | accel/tcg: Zero-pad vaddr in tlb debug output | 19 | tcg: Drop unused tcg_temp_free define |
15 | accel/tcg: Fix type of 'last' for pageflags_{find,next} | 20 | tcg: Introduce tcg_use_softmmu |
16 | accel/tcg: Fix sense of read-only probes in ldst_atomicity | 21 | tcg: Optimize past conditional branches |
17 | accel/tcg: Take mmap_lock in load_atomic*_or_exit | 22 | tcg: Use constant zero when expanding with divu2 |
18 | tcg: Add earlyclobber to op_add2 for x86 and s390x | 23 | tcg/ppc: Enable direct branching tcg_out_goto_tb with TCG_REG_TB |
19 | tcg/ppc: Fix race in goto_tb implementation | 24 | tcg/ppc: Use ADDPCIS for power9 |
25 | tcg/ppc: Use prefixed instructions for power10 | ||
26 | tcg/ppc: Disable TCG_REG_TB for Power9/Power10 | ||
20 | 27 | ||
21 | ---------------------------------------------------------------- | 28 | ---------------------------------------------------------------- |
22 | Anton Johansson (1): | 29 | Jordan Niethe (1): |
23 | accel/tcg: Zero-pad vaddr in tlb_debug output | 30 | tcg/ppc: Enable direct branching tcg_out_goto_tb with TCG_REG_TB |
24 | 31 | ||
25 | Ilya Leoshkevich (1): | 32 | Mike Frysinger (1): |
26 | tcg/{i386, s390x}: Add earlyclobber to the op_add2's first output | 33 | tcg: drop unused tcg_temp_free define |
27 | 34 | ||
28 | Jordan Niethe (1): | 35 | Richard Henderson (27): |
29 | tcg/ppc: Fix race in goto_tb implementation | 36 | tcg/ppc: Untabify tcg-target.c.inc |
37 | tcg/ppc: Reinterpret tb-relative to TB+4 | ||
38 | tcg/ppc: Use ADDPCIS in tcg_out_tb_start | ||
39 | tcg/ppc: Use ADDPCIS in tcg_out_movi_int | ||
40 | tcg/ppc: Use ADDPCIS for the constant pool | ||
41 | tcg/ppc: Use ADDPCIS in tcg_out_goto_tb | ||
42 | tcg/ppc: Use PADDI in tcg_out_movi | ||
43 | tcg/ppc: Use prefixed instructions in tcg_out_mem_long | ||
44 | tcg/ppc: Use PLD in tcg_out_movi for constant pool | ||
45 | tcg/ppc: Use prefixed instructions in tcg_out_dupi_vec | ||
46 | tcg/ppc: Use PLD in tcg_out_goto_tb | ||
47 | tcg/ppc: Disable TCG_REG_TB for Power9/Power10 | ||
48 | tcg: Introduce tcg_use_softmmu | ||
49 | tcg: Provide guest_base fallback for system mode | ||
50 | tcg/arm: Use tcg_use_softmmu | ||
51 | tcg/aarch64: Use tcg_use_softmmu | ||
52 | tcg/i386: Use tcg_use_softmmu | ||
53 | tcg/loongarch64: Use tcg_use_softmmu | ||
54 | tcg/mips: Use tcg_use_softmmu | ||
55 | tcg/ppc: Use tcg_use_softmmu | ||
56 | tcg/riscv: Do not reserve TCG_GUEST_BASE_REG for guest_base zero | ||
57 | tcg/riscv: Use tcg_use_softmmu | ||
58 | tcg/s390x: Use tcg_use_softmmu | ||
59 | tcg: Use constant zero when expanding with divu2 | ||
60 | tcg: Optimize past conditional branches | ||
61 | tcg: Add tcg_gen_{ld,st}_i128 | ||
62 | target/i386: Use i128 for 128 and 256-bit loads and stores | ||
30 | 63 | ||
31 | Luca Bonissi (1): | 64 | include/tcg/tcg-op-common.h | 3 + |
32 | accel/tcg: Fix type of 'last' for pageflags_{find,next} | 65 | include/tcg/tcg-op.h | 2 - |
33 | 66 | include/tcg/tcg.h | 8 +- | |
34 | Richard Henderson (3): | 67 | target/i386/tcg/translate.c | 63 ++--- |
35 | include/exec: Add WITH_MMAP_LOCK_GUARD | 68 | tcg/optimize.c | 8 +- |
36 | accel/tcg: Fix sense of read-only probes in ldst_atomicity | 69 | tcg/tcg-op-ldst.c | 14 +- |
37 | accel/tcg: Take mmap_lock in load_atomic*_or_exit | 70 | tcg/tcg-op.c | 38 ++- |
38 | 71 | tcg/tcg.c | 13 +- | |
39 | include/exec/exec-all.h | 10 ++++++++++ | 72 | tcg/aarch64/tcg-target.c.inc | 177 ++++++------ |
40 | tcg/i386/tcg-target-con-set.h | 5 ++++- | 73 | tcg/arm/tcg-target.c.inc | 203 +++++++------- |
41 | tcg/s390x/tcg-target-con-set.h | 8 +++++--- | 74 | tcg/i386/tcg-target.c.inc | 198 +++++++------- |
42 | accel/tcg/cputlb.c | 20 ++++++++++---------- | 75 | tcg/loongarch64/tcg-target.c.inc | 126 +++++---- |
43 | accel/tcg/user-exec.c | 4 ++-- | 76 | tcg/mips/tcg-target.c.inc | 231 ++++++++-------- |
44 | bsd-user/mmap.c | 1 + | 77 | tcg/ppc/tcg-target.c.inc | 561 ++++++++++++++++++++++++++------------- |
45 | linux-user/mmap.c | 1 + | 78 | tcg/riscv/tcg-target.c.inc | 189 ++++++------- |
46 | tcg/tcg.c | 8 +++++++- | 79 | tcg/s390x/tcg-target.c.inc | 161 ++++++----- |
47 | accel/tcg/ldst_atomicity.c.inc | 32 ++++++++++++++++++-------------- | 80 | 16 files changed, 1102 insertions(+), 893 deletions(-) |
48 | tcg/i386/tcg-target.c.inc | 2 +- | ||
49 | tcg/ppc/tcg-target.c.inc | 9 +++++---- | ||
50 | tcg/s390x/tcg-target.c.inc | 4 ++-- | ||
51 | 12 files changed, 66 insertions(+), 38 deletions(-) | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Jordan Niethe <jniethe5@gmail.com> | ||
2 | 1 | ||
3 | Commit 20b6643324 ("tcg/ppc: Reorg goto_tb implementation") modified | ||
4 | goto_tb to ensure only a single instruction was patched to prevent | ||
5 | incorrect behavior if a thread was in the middle of multiple | ||
6 | instructions when they were replaced. However this introduced a race | ||
7 | between loading the jmp target into TCG_REG_TB and patching and | ||
8 | executing the direct branch. | ||
9 | |||
10 | The relevant part of the goto_tb implementation: | ||
11 | |||
12 | ld TCG_REG_TB, TARGET_ADDR_LOCATION(TCG_REG_TB) | ||
13 | patch_location: | ||
14 | mtctr TCG_REG_TB | ||
15 | bctr | ||
16 | |||
17 | tb_target_set_jmp_target() will replace 'patch_location' with a direct | ||
18 | branch if the target is in range. The direct branch now relies on | ||
19 | TCG_REG_TB being set up correctly by the ld. Prior to this commit | ||
20 | multiple instructions were patched in for the direct branch case; these | ||
21 | instructions would initialize TCG_REG_TB to the same value as the branch | ||
22 | target. | ||
23 | |||
24 | Imagine the following sequence: | ||
25 | |||
26 | 1) Thread A is executing the goto_tb sequence and loads the jmp | ||
27 | target into TCG_REG_TB. | ||
28 | |||
29 | 2) Thread B updates the jmp target address and calls | ||
30 | tb_target_set_jmp_target(). This patches a new direct branch into the | ||
31 | goto_tb sequence. | ||
32 | |||
33 | 3) Thread A executes the newly patched direct branch. The value in | ||
34 | TCG_REG_TB still contains the old jmp target. | ||
35 | |||
36 | TCG_REG_TB MUST contain the translation block's tc.ptr. Execution will | ||
37 | eventually crash after performing memory accesses generated from a | ||
38 | faulty value in TCG_REG_TB. | ||
39 | |||
40 | This presents as segfaults or illegal instruction exceptions. | ||
41 | |||
42 | Do not revert commit 20b6643324 as it did fix a different race | ||
43 | condition. Instead remove the direct branch optimization and always use | ||
44 | indirect branches. | ||
45 | |||
46 | The direct branch optimization can be re-added later with a race free | ||
47 | sequence. | ||
48 | |||
49 | Fixes: 20b6643324 ("tcg/ppc: Reorg goto_tb implementation") | ||
50 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1726 | ||
51 | Reported-by: Anushree Mathur <anushree.mathur@linux.vnet.ibm.com> | ||
52 | Tested-by: Anushree Mathur <anushree.mathur@linux.vnet.ibm.com> | ||
53 | Tested-by: Michael Tokarev <mjt@tls.msk.ru> | ||
54 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
55 | Co-developed-by: Benjamin Gray <bgray@linux.ibm.com> | ||
56 | Signed-off-by: Jordan Niethe <jniethe5@gmail.com> | ||
57 | Signed-off-by: Benjamin Gray <bgray@linux.ibm.com> | ||
58 | Message-Id: <20230717093001.13167-1-jniethe5@gmail.com> | ||
59 | --- | ||
60 | tcg/ppc/tcg-target.c.inc | 9 +++++---- | ||
61 | 1 file changed, 5 insertions(+), 4 deletions(-) | ||
62 | |||
63 | diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc | ||
64 | index XXXXXXX..XXXXXXX 100644 | ||
65 | --- a/tcg/ppc/tcg-target.c.inc | ||
66 | +++ b/tcg/ppc/tcg-target.c.inc | ||
67 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_goto_tb(TCGContext *s, int which) | ||
68 | ptrdiff_t offset = tcg_tbrel_diff(s, (void *)ptr); | ||
69 | tcg_out_mem_long(s, LD, LDX, TCG_REG_TB, TCG_REG_TB, offset); | ||
70 | |||
71 | - /* Direct branch will be patched by tb_target_set_jmp_target. */ | ||
72 | + /* TODO: Use direct branches when possible. */ | ||
73 | set_jmp_insn_offset(s, which); | ||
74 | tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR); | ||
75 | |||
76 | - /* When branch is out of range, fall through to indirect. */ | ||
77 | tcg_out32(s, BCCTR | BO_ALWAYS); | ||
78 | |||
79 | /* For the unlinked case, need to reset TCG_REG_TB. */ | ||
80 | @@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n, | ||
81 | intptr_t diff = addr - jmp_rx; | ||
82 | tcg_insn_unit insn; | ||
83 | |||
84 | + if (USE_REG_TB) { | ||
85 | + return; | ||
86 | + } | ||
87 | + | ||
88 | if (in_range_b(diff)) { | ||
89 | insn = B | (diff & 0x3fffffc); | ||
90 | - } else if (USE_REG_TB) { | ||
91 | - insn = MTSPR | RS(TCG_REG_TB) | CTR; | ||
92 | } else { | ||
93 | insn = NOP; | ||
94 | } | ||
95 | -- | ||
96 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
2 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
3 | --- | ||
4 | include/exec/exec-all.h | 10 ++++++++++ | ||
5 | bsd-user/mmap.c | 1 + | ||
6 | linux-user/mmap.c | 1 + | ||
7 | 3 files changed, 12 insertions(+) | ||
8 | 1 | ||
9 | diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h | ||
10 | index XXXXXXX..XXXXXXX 100644 | ||
11 | --- a/include/exec/exec-all.h | ||
12 | +++ b/include/exec/exec-all.h | ||
13 | @@ -XXX,XX +XXX,XX @@ void TSA_NO_TSA mmap_lock(void); | ||
14 | void TSA_NO_TSA mmap_unlock(void); | ||
15 | bool have_mmap_lock(void); | ||
16 | |||
17 | +static inline void mmap_unlock_guard(void *unused) | ||
18 | +{ | ||
19 | + mmap_unlock(); | ||
20 | +} | ||
21 | + | ||
22 | +#define WITH_MMAP_LOCK_GUARD() \ | ||
23 | + for (int _mmap_lock_iter __attribute__((cleanup(mmap_unlock_guard))) \ | ||
24 | + = (mmap_lock(), 0); _mmap_lock_iter == 0; _mmap_lock_iter = 1) | ||
25 | + | ||
26 | /** | ||
27 | * adjust_signal_pc: | ||
28 | * @pc: raw pc from the host signal ucontext_t. | ||
29 | @@ -XXX,XX +XXX,XX @@ G_NORETURN void cpu_loop_exit_sigbus(CPUState *cpu, target_ulong addr, | ||
30 | #else | ||
31 | static inline void mmap_lock(void) {} | ||
32 | static inline void mmap_unlock(void) {} | ||
33 | +#define WITH_MMAP_LOCK_GUARD() | ||
34 | |||
35 | void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length); | ||
36 | void tlb_set_dirty(CPUState *cpu, vaddr addr); | ||
37 | diff --git a/bsd-user/mmap.c b/bsd-user/mmap.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/bsd-user/mmap.c | ||
40 | +++ b/bsd-user/mmap.c | ||
41 | @@ -XXX,XX +XXX,XX @@ void mmap_lock(void) | ||
42 | |||
43 | void mmap_unlock(void) | ||
44 | { | ||
45 | + assert(mmap_lock_count > 0); | ||
46 | if (--mmap_lock_count == 0) { | ||
47 | pthread_mutex_unlock(&mmap_mutex); | ||
48 | } | ||
49 | diff --git a/linux-user/mmap.c b/linux-user/mmap.c | ||
50 | index XXXXXXX..XXXXXXX 100644 | ||
51 | --- a/linux-user/mmap.c | ||
52 | +++ b/linux-user/mmap.c | ||
53 | @@ -XXX,XX +XXX,XX @@ void mmap_lock(void) | ||
54 | |||
55 | void mmap_unlock(void) | ||
56 | { | ||
57 | + assert(mmap_lock_count > 0); | ||
58 | if (--mmap_lock_count == 0) { | ||
59 | pthread_mutex_unlock(&mmap_mutex); | ||
60 | } | ||
61 | -- | ||
62 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | In the initial commit, cdfac37be0d, the sense of the test is incorrect, | ||
2 | as the -1/0 return was confusing. In bef6f008b981, we mechanically | ||
3 | invert all callers while changing to false/true return, preserving the | ||
4 | incorrectness of the test. | ||
5 | 1 | ||
6 | Now that the return sense is sane, it's easy to see that if !write, | ||
7 | then the page is not modifiable (i.e. most likely read-only, with | ||
8 | PROT_NONE handled via SIGSEGV). | ||
9 | |||
10 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | --- | ||
13 | accel/tcg/ldst_atomicity.c.inc | 4 ++-- | ||
14 | 1 file changed, 2 insertions(+), 2 deletions(-) | ||
15 | |||
16 | diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/accel/tcg/ldst_atomicity.c.inc | ||
19 | +++ b/accel/tcg/ldst_atomicity.c.inc | ||
20 | @@ -XXX,XX +XXX,XX @@ static uint64_t load_atomic8_or_exit(CPUArchState *env, uintptr_t ra, void *pv) | ||
21 | * another process, because the fallback start_exclusive solution | ||
22 | * provides no protection across processes. | ||
23 | */ | ||
24 | - if (page_check_range(h2g(pv), 8, PAGE_WRITE_ORG)) { | ||
25 | + if (!page_check_range(h2g(pv), 8, PAGE_WRITE_ORG)) { | ||
26 | uint64_t *p = __builtin_assume_aligned(pv, 8); | ||
27 | return *p; | ||
28 | } | ||
29 | @@ -XXX,XX +XXX,XX @@ static Int128 load_atomic16_or_exit(CPUArchState *env, uintptr_t ra, void *pv) | ||
30 | * another process, because the fallback start_exclusive solution | ||
31 | * provides no protection across processes. | ||
32 | */ | ||
33 | - if (page_check_range(h2g(p), 16, PAGE_WRITE_ORG)) { | ||
34 | + if (!page_check_range(h2g(p), 16, PAGE_WRITE_ORG)) { | ||
35 | return *p; | ||
36 | } | ||
37 | #endif | ||
38 | -- | ||
39 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | For user-only, the probe for page writability may race with another | ||
2 | thread's mprotect. Take the mmap_lock around the operation. This | ||
3 | is still faster than the start/end_exclusive fallback. | ||
4 | 1 | ||
5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
7 | --- | ||
8 | accel/tcg/ldst_atomicity.c.inc | 32 ++++++++++++++++++-------------- | ||
9 | 1 file changed, 18 insertions(+), 14 deletions(-) | ||
10 | |||
11 | diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/accel/tcg/ldst_atomicity.c.inc | ||
14 | +++ b/accel/tcg/ldst_atomicity.c.inc | ||
15 | @@ -XXX,XX +XXX,XX @@ static uint64_t load_atomic8_or_exit(CPUArchState *env, uintptr_t ra, void *pv) | ||
16 | * another process, because the fallback start_exclusive solution | ||
17 | * provides no protection across processes. | ||
18 | */ | ||
19 | - if (!page_check_range(h2g(pv), 8, PAGE_WRITE_ORG)) { | ||
20 | - uint64_t *p = __builtin_assume_aligned(pv, 8); | ||
21 | - return *p; | ||
22 | + WITH_MMAP_LOCK_GUARD() { | ||
23 | + if (!page_check_range(h2g(pv), 8, PAGE_WRITE_ORG)) { | ||
24 | + uint64_t *p = __builtin_assume_aligned(pv, 8); | ||
25 | + return *p; | ||
26 | + } | ||
27 | } | ||
28 | #endif | ||
29 | |||
30 | @@ -XXX,XX +XXX,XX @@ static Int128 load_atomic16_or_exit(CPUArchState *env, uintptr_t ra, void *pv) | ||
31 | return atomic16_read_ro(p); | ||
32 | } | ||
33 | |||
34 | -#ifdef CONFIG_USER_ONLY | ||
35 | /* | ||
36 | * We can only use cmpxchg to emulate a load if the page is writable. | ||
37 | * If the page is not writable, then assume the value is immutable | ||
38 | * and requires no locking. This ignores the case of MAP_SHARED with | ||
39 | * another process, because the fallback start_exclusive solution | ||
40 | * provides no protection across processes. | ||
41 | + * | ||
42 | + * In system mode all guest pages are writable. For user mode, | ||
43 | + * we must take mmap_lock so that the query remains valid until | ||
44 | + * the write is complete -- tests/tcg/multiarch/munmap-pthread.c | ||
45 | + * is an example that can race. | ||
46 | */ | ||
47 | - if (!page_check_range(h2g(p), 16, PAGE_WRITE_ORG)) { | ||
48 | - return *p; | ||
49 | - } | ||
50 | + WITH_MMAP_LOCK_GUARD() { | ||
51 | +#ifdef CONFIG_USER_ONLY | ||
52 | + if (!page_check_range(h2g(p), 16, PAGE_WRITE_ORG)) { | ||
53 | + return *p; | ||
54 | + } | ||
55 | #endif | ||
56 | - | ||
57 | - /* | ||
58 | - * In system mode all guest pages are writable, and for user-only | ||
59 | - * we have just checked writability. Try cmpxchg. | ||
60 | - */ | ||
61 | - if (HAVE_ATOMIC128_RW) { | ||
62 | - return atomic16_read_rw(p); | ||
63 | + if (HAVE_ATOMIC128_RW) { | ||
64 | + return atomic16_read_rw(p); | ||
65 | + } | ||
66 | } | ||
67 | |||
68 | /* Ultimate fallback: re-execute in serial context. */ | ||
69 | -- | ||
70 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Ilya Leoshkevich <iii@linux.ibm.com> | 1 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
---|---|---|---|
2 | |||
3 | i386 and s390x implementations of op_add2 require an earlyclobber, | ||
4 | which is currently missing. This breaks VCKSM in s390x guests. E.g., on | ||
5 | x86_64 the following op: | ||
6 | |||
7 | add2_i32 tmp2,tmp3,tmp2,tmp3,tmp3,tmp2 dead: 0 2 3 4 5 pref=none,0xffff | ||
8 | |||
9 | is translated to: | ||
10 | |||
11 | addl %ebx, %r12d | ||
12 | adcl %r12d, %ebx | ||
13 | |||
14 | Introduce a new C_N1_O1_I4 constraint, and make sure that earlyclobber | ||
15 | of aliased outputs is honored. | ||
16 | |||
17 | Cc: qemu-stable@nongnu.org | ||
18 | Fixes: 82790a870992 ("tcg: Add markup for output requires new register") | ||
19 | Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com> | ||
20 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
21 | Message-Id: <20230719221310.1968845-7-iii@linux.ibm.com> | ||
22 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 2 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
23 | --- | 3 | --- |
24 | tcg/i386/tcg-target-con-set.h | 5 ++++- | 4 | tcg/i386/tcg-target.c.inc | 198 +++++++++++++++++++------------------- |
25 | tcg/s390x/tcg-target-con-set.h | 8 +++++--- | 5 | 1 file changed, 98 insertions(+), 100 deletions(-) |
26 | tcg/tcg.c | 8 +++++++- | ||
27 | tcg/i386/tcg-target.c.inc | 2 +- | ||
28 | tcg/s390x/tcg-target.c.inc | 4 ++-- | ||
29 | 5 files changed, 19 insertions(+), 8 deletions(-) | ||
30 | 6 | ||
31 | diff --git a/tcg/i386/tcg-target-con-set.h b/tcg/i386/tcg-target-con-set.h | ||
32 | index XXXXXXX..XXXXXXX 100644 | ||
33 | --- a/tcg/i386/tcg-target-con-set.h | ||
34 | +++ b/tcg/i386/tcg-target-con-set.h | ||
35 | @@ -XXX,XX +XXX,XX @@ | ||
36 | * | ||
37 | * C_N1_Im(...) defines a constraint set with 1 output and <m> inputs, | ||
38 | * except that the output must use a new register. | ||
39 | + * | ||
40 | + * C_Nn_Om_Ik(...) defines a constraint set with <n + m> outputs and <k> | ||
41 | + * inputs, except that the first <n> outputs must use new registers. | ||
42 | */ | ||
43 | C_O0_I1(r) | ||
44 | C_O0_I2(L, L) | ||
45 | @@ -XXX,XX +XXX,XX @@ C_O2_I1(r, r, L) | ||
46 | C_O2_I2(a, d, a, r) | ||
47 | C_O2_I2(r, r, L, L) | ||
48 | C_O2_I3(a, d, 0, 1, r) | ||
49 | -C_O2_I4(r, r, 0, 1, re, re) | ||
50 | +C_N1_O1_I4(r, r, 0, 1, re, re) | ||
51 | diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h | ||
52 | index XXXXXXX..XXXXXXX 100644 | ||
53 | --- a/tcg/s390x/tcg-target-con-set.h | ||
54 | +++ b/tcg/s390x/tcg-target-con-set.h | ||
55 | @@ -XXX,XX +XXX,XX @@ | ||
56 | * C_On_Im(...) defines a constraint set with <n> outputs and <m> inputs. | ||
57 | * Each operand should be a sequence of constraint letters as defined by | ||
58 | * tcg-target-con-str.h; the constraint combination is inclusive or. | ||
59 | + * | ||
60 | + * C_Nn_Om_Ik(...) defines a constraint set with <n + m> outputs and <k> | ||
61 | + * inputs, except that the first <n> outputs must use new registers. | ||
62 | */ | ||
63 | C_O0_I1(r) | ||
64 | C_O0_I2(r, r) | ||
65 | @@ -XXX,XX +XXX,XX @@ C_O2_I1(o, m, r) | ||
66 | C_O2_I2(o, m, 0, r) | ||
67 | C_O2_I2(o, m, r, r) | ||
68 | C_O2_I3(o, m, 0, 1, r) | ||
69 | -C_O2_I4(r, r, 0, 1, rA, r) | ||
70 | -C_O2_I4(r, r, 0, 1, ri, r) | ||
71 | -C_O2_I4(r, r, 0, 1, r, r) | ||
72 | +C_N1_O1_I4(r, r, 0, 1, ri, r) | ||
73 | +C_N1_O1_I4(r, r, 0, 1, rA, r) | ||
74 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
75 | index XXXXXXX..XXXXXXX 100644 | ||
76 | --- a/tcg/tcg.c | ||
77 | +++ b/tcg/tcg.c | ||
78 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1, | ||
79 | #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2), | ||
80 | #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3), | ||
81 | #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4), | ||
82 | +#define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4), | ||
83 | |||
84 | typedef enum { | ||
85 | #include "tcg-target-con-set.h" | ||
86 | @@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode); | ||
87 | #undef C_O2_I2 | ||
88 | #undef C_O2_I3 | ||
89 | #undef C_O2_I4 | ||
90 | +#undef C_N1_O1_I4 | ||
91 | |||
92 | /* Put all of the constraint sets into an array, indexed by the enum. */ | ||
93 | |||
94 | @@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode); | ||
95 | #define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } }, | ||
96 | #define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } }, | ||
97 | #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } }, | ||
98 | +#define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { "&" #O1, #O2, #I1, #I2, #I3, #I4 } }, | ||
99 | |||
100 | static const TCGTargetOpDef constraint_sets[] = { | ||
101 | #include "tcg-target-con-set.h" | ||
102 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef constraint_sets[] = { | ||
103 | #undef C_O2_I2 | ||
104 | #undef C_O2_I3 | ||
105 | #undef C_O2_I4 | ||
106 | +#undef C_N1_O1_I4 | ||
107 | |||
108 | /* Expand the enumerator to be returned from tcg_target_op_def(). */ | ||
109 | |||
110 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef constraint_sets[] = { | ||
111 | #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2) | ||
112 | #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3) | ||
113 | #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) | ||
114 | +#define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4) | ||
115 | |||
116 | #include "tcg-target.c.inc" | ||
117 | |||
118 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) | ||
119 | * dead after the instruction, we must allocate a new | ||
120 | * register and move it. | ||
121 | */ | ||
122 | - if (temp_readonly(ts) || !IS_DEAD_ARG(i)) { | ||
123 | + if (temp_readonly(ts) || !IS_DEAD_ARG(i) | ||
124 | + || def->args_ct[arg_ct->alias_index].newreg) { | ||
125 | allocate_new_reg = true; | ||
126 | } else if (ts->val_type == TEMP_VAL_REG) { | ||
127 | /* | ||
128 | diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc | 7 | diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc |
129 | index XXXXXXX..XXXXXXX 100644 | 8 | index XXXXXXX..XXXXXXX 100644 |
130 | --- a/tcg/i386/tcg-target.c.inc | 9 | --- a/tcg/i386/tcg-target.c.inc |
131 | +++ b/tcg/i386/tcg-target.c.inc | 10 | +++ b/tcg/i386/tcg-target.c.inc |
132 | @@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) | 11 | @@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) |
133 | case INDEX_op_add2_i64: | 12 | # define ALL_VECTOR_REGS 0x00ff0000u |
134 | case INDEX_op_sub2_i32: | 13 | # define ALL_BYTEL_REGS 0x0000000fu |
135 | case INDEX_op_sub2_i64: | 14 | #endif |
136 | - return C_O2_I4(r, r, 0, 1, re, re); | 15 | -#ifdef CONFIG_SOFTMMU |
137 | + return C_N1_O1_I4(r, r, 0, 1, re, re); | 16 | -# define SOFTMMU_RESERVE_REGS ((1 << TCG_REG_L0) | (1 << TCG_REG_L1)) |
138 | 17 | -#else | |
139 | case INDEX_op_ctz_i32: | 18 | -# define SOFTMMU_RESERVE_REGS 0 |
140 | case INDEX_op_ctz_i64: | 19 | -#endif |
141 | diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc | 20 | +#define SOFTMMU_RESERVE_REGS \ |
142 | index XXXXXXX..XXXXXXX 100644 | 21 | + (tcg_use_softmmu ? (1 << TCG_REG_L0) | (1 << TCG_REG_L1) : 0) |
143 | --- a/tcg/s390x/tcg-target.c.inc | 22 | |
144 | +++ b/tcg/s390x/tcg-target.c.inc | 23 | /* For 64-bit, we always know that CMOV is available. */ |
145 | @@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) | 24 | #if TCG_TARGET_REG_BITS == 64 |
146 | 25 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) | |
147 | case INDEX_op_add2_i32: | 26 | return true; |
148 | case INDEX_op_sub2_i32: | 27 | } |
149 | - return C_O2_I4(r, r, 0, 1, ri, r); | 28 | |
150 | + return C_N1_O1_I4(r, r, 0, 1, ri, r); | 29 | -#ifndef CONFIG_SOFTMMU |
151 | 30 | +#ifdef CONFIG_USER_ONLY | |
152 | case INDEX_op_add2_i64: | 31 | static HostAddress x86_guest_base = { |
153 | case INDEX_op_sub2_i64: | 32 | .index = -1 |
154 | - return C_O2_I4(r, r, 0, 1, rA, r); | 33 | }; |
155 | + return C_N1_O1_I4(r, r, 0, 1, rA, r); | 34 | @@ -XXX,XX +XXX,XX @@ static inline int setup_guest_base_seg(void) |
156 | 35 | } | |
157 | case INDEX_op_st_vec: | 36 | return 0; |
158 | return C_O0_I2(v, r); | 37 | } |
38 | +#define setup_guest_base_seg setup_guest_base_seg | ||
39 | #elif defined(__x86_64__) && \ | ||
40 | (defined (__FreeBSD__) || defined (__FreeBSD_kernel__)) | ||
41 | # include <machine/sysarch.h> | ||
42 | @@ -XXX,XX +XXX,XX @@ static inline int setup_guest_base_seg(void) | ||
43 | } | ||
44 | return 0; | ||
45 | } | ||
46 | +#define setup_guest_base_seg setup_guest_base_seg | ||
47 | +#endif | ||
48 | #else | ||
49 | -static inline int setup_guest_base_seg(void) | ||
50 | -{ | ||
51 | - return 0; | ||
52 | -} | ||
53 | -#endif /* setup_guest_base_seg */ | ||
54 | -#endif /* !SOFTMMU */ | ||
55 | +# define x86_guest_base (*(HostAddress *)({ qemu_build_not_reached(); NULL; })) | ||
56 | +#endif /* CONFIG_USER_ONLY */ | ||
57 | +#ifndef setup_guest_base_seg | ||
58 | +# define setup_guest_base_seg() 0 | ||
59 | +#endif | ||
60 | |||
61 | #define MIN_TLB_MASK_TABLE_OFS INT_MIN | ||
62 | |||
63 | @@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, | ||
64 | MemOp s_bits = opc & MO_SIZE; | ||
65 | unsigned a_mask; | ||
66 | |||
67 | -#ifdef CONFIG_SOFTMMU | ||
68 | - h->index = TCG_REG_L0; | ||
69 | - h->ofs = 0; | ||
70 | - h->seg = 0; | ||
71 | -#else | ||
72 | - *h = x86_guest_base; | ||
73 | -#endif | ||
74 | + if (tcg_use_softmmu) { | ||
75 | + h->index = TCG_REG_L0; | ||
76 | + h->ofs = 0; | ||
77 | + h->seg = 0; | ||
78 | + } else { | ||
79 | + *h = x86_guest_base; | ||
80 | + } | ||
81 | h->base = addrlo; | ||
82 | h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128); | ||
83 | a_mask = (1 << h->aa.align) - 1; | ||
84 | |||
85 | -#ifdef CONFIG_SOFTMMU | ||
86 | - int cmp_ofs = is_ld ? offsetof(CPUTLBEntry, addr_read) | ||
87 | - : offsetof(CPUTLBEntry, addr_write); | ||
88 | - TCGType ttype = TCG_TYPE_I32; | ||
89 | - TCGType tlbtype = TCG_TYPE_I32; | ||
90 | - int trexw = 0, hrexw = 0, tlbrexw = 0; | ||
91 | - unsigned mem_index = get_mmuidx(oi); | ||
92 | - unsigned s_mask = (1 << s_bits) - 1; | ||
93 | - int fast_ofs = tlb_mask_table_ofs(s, mem_index); | ||
94 | - int tlb_mask; | ||
95 | + if (tcg_use_softmmu) { | ||
96 | + int cmp_ofs = is_ld ? offsetof(CPUTLBEntry, addr_read) | ||
97 | + : offsetof(CPUTLBEntry, addr_write); | ||
98 | + TCGType ttype = TCG_TYPE_I32; | ||
99 | + TCGType tlbtype = TCG_TYPE_I32; | ||
100 | + int trexw = 0, hrexw = 0, tlbrexw = 0; | ||
101 | + unsigned mem_index = get_mmuidx(oi); | ||
102 | + unsigned s_mask = (1 << s_bits) - 1; | ||
103 | + int fast_ofs = tlb_mask_table_ofs(s, mem_index); | ||
104 | + int tlb_mask; | ||
105 | |||
106 | - ldst = new_ldst_label(s); | ||
107 | - ldst->is_ld = is_ld; | ||
108 | - ldst->oi = oi; | ||
109 | - ldst->addrlo_reg = addrlo; | ||
110 | - ldst->addrhi_reg = addrhi; | ||
111 | + ldst = new_ldst_label(s); | ||
112 | + ldst->is_ld = is_ld; | ||
113 | + ldst->oi = oi; | ||
114 | + ldst->addrlo_reg = addrlo; | ||
115 | + ldst->addrhi_reg = addrhi; | ||
116 | |||
117 | - if (TCG_TARGET_REG_BITS == 64) { | ||
118 | - ttype = s->addr_type; | ||
119 | - trexw = (ttype == TCG_TYPE_I32 ? 0 : P_REXW); | ||
120 | - if (TCG_TYPE_PTR == TCG_TYPE_I64) { | ||
121 | - hrexw = P_REXW; | ||
122 | - if (s->page_bits + s->tlb_dyn_max_bits > 32) { | ||
123 | - tlbtype = TCG_TYPE_I64; | ||
124 | - tlbrexw = P_REXW; | ||
125 | + if (TCG_TARGET_REG_BITS == 64) { | ||
126 | + ttype = s->addr_type; | ||
127 | + trexw = (ttype == TCG_TYPE_I32 ? 0 : P_REXW); | ||
128 | + if (TCG_TYPE_PTR == TCG_TYPE_I64) { | ||
129 | + hrexw = P_REXW; | ||
130 | + if (s->page_bits + s->tlb_dyn_max_bits > 32) { | ||
131 | + tlbtype = TCG_TYPE_I64; | ||
132 | + tlbrexw = P_REXW; | ||
133 | + } | ||
134 | } | ||
135 | } | ||
136 | - } | ||
137 | |||
138 | - tcg_out_mov(s, tlbtype, TCG_REG_L0, addrlo); | ||
139 | - tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0, | ||
140 | - s->page_bits - CPU_TLB_ENTRY_BITS); | ||
141 | + tcg_out_mov(s, tlbtype, TCG_REG_L0, addrlo); | ||
142 | + tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0, | ||
143 | + s->page_bits - CPU_TLB_ENTRY_BITS); | ||
144 | |||
145 | - tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0, | ||
146 | - fast_ofs + offsetof(CPUTLBDescFast, mask)); | ||
147 | + tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0, | ||
148 | + fast_ofs + offsetof(CPUTLBDescFast, mask)); | ||
149 | |||
150 | - tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L0, TCG_AREG0, | ||
151 | - fast_ofs + offsetof(CPUTLBDescFast, table)); | ||
152 | + tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L0, TCG_AREG0, | ||
153 | + fast_ofs + offsetof(CPUTLBDescFast, table)); | ||
154 | |||
155 | - /* | ||
156 | - * If the required alignment is at least as large as the access, simply | ||
157 | - * copy the address and mask. For lesser alignments, check that we don't | ||
158 | - * cross pages for the complete access. | ||
159 | - */ | ||
160 | - if (a_mask >= s_mask) { | ||
161 | - tcg_out_mov(s, ttype, TCG_REG_L1, addrlo); | ||
162 | - } else { | ||
163 | - tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1, | ||
164 | - addrlo, s_mask - a_mask); | ||
165 | - } | ||
166 | - tlb_mask = s->page_mask | a_mask; | ||
167 | - tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0); | ||
168 | + /* | ||
169 | + * If the required alignment is at least as large as the access, | ||
170 | + * simply copy the address and mask. For lesser alignments, | ||
171 | + * check that we don't cross pages for the complete access. | ||
172 | + */ | ||
173 | + if (a_mask >= s_mask) { | ||
174 | + tcg_out_mov(s, ttype, TCG_REG_L1, addrlo); | ||
175 | + } else { | ||
176 | + tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1, | ||
177 | + addrlo, s_mask - a_mask); | ||
178 | + } | ||
179 | + tlb_mask = s->page_mask | a_mask; | ||
180 | + tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0); | ||
181 | |||
182 | - /* cmp 0(TCG_REG_L0), TCG_REG_L1 */ | ||
183 | - tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, | ||
184 | - TCG_REG_L1, TCG_REG_L0, cmp_ofs); | ||
185 | - | ||
186 | - /* jne slow_path */ | ||
187 | - tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); | ||
188 | - ldst->label_ptr[0] = s->code_ptr; | ||
189 | - s->code_ptr += 4; | ||
190 | - | ||
191 | - if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I64) { | ||
192 | - /* cmp 4(TCG_REG_L0), addrhi */ | ||
193 | - tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, TCG_REG_L0, cmp_ofs + 4); | ||
194 | + /* cmp 0(TCG_REG_L0), TCG_REG_L1 */ | ||
195 | + tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, | ||
196 | + TCG_REG_L1, TCG_REG_L0, cmp_ofs); | ||
197 | |||
198 | /* jne slow_path */ | ||
199 | tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); | ||
200 | - ldst->label_ptr[1] = s->code_ptr; | ||
201 | + ldst->label_ptr[0] = s->code_ptr; | ||
202 | s->code_ptr += 4; | ||
203 | - } | ||
204 | |||
205 | - /* TLB Hit. */ | ||
206 | - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_L0, TCG_REG_L0, | ||
207 | - offsetof(CPUTLBEntry, addend)); | ||
208 | -#else | ||
209 | - if (a_mask) { | ||
210 | + if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I64) { | ||
211 | + /* cmp 4(TCG_REG_L0), addrhi */ | ||
212 | + tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, | ||
213 | + TCG_REG_L0, cmp_ofs + 4); | ||
214 | + | ||
215 | + /* jne slow_path */ | ||
216 | + tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0); | ||
217 | + ldst->label_ptr[1] = s->code_ptr; | ||
218 | + s->code_ptr += 4; | ||
219 | + } | ||
220 | + | ||
221 | + /* TLB Hit. */ | ||
222 | + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_L0, TCG_REG_L0, | ||
223 | + offsetof(CPUTLBEntry, addend)); | ||
224 | + } else if (a_mask) { | ||
225 | ldst = new_ldst_label(s); | ||
226 | |||
227 | ldst->is_ld = is_ld; | ||
228 | @@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, | ||
229 | ldst->label_ptr[0] = s->code_ptr; | ||
230 | s->code_ptr += 4; | ||
231 | } | ||
232 | -#endif | ||
233 | |||
234 | return ldst; | ||
235 | } | ||
236 | @@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s) | ||
237 | tcg_out_push(s, tcg_target_callee_save_regs[i]); | ||
238 | } | ||
239 | |||
240 | -#if TCG_TARGET_REG_BITS == 32 | ||
241 | - tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, | ||
242 | - (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4); | ||
243 | - tcg_out_addi(s, TCG_REG_ESP, -stack_addend); | ||
244 | - /* jmp *tb. */ | ||
245 | - tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP, | ||
246 | - (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4 | ||
247 | - + stack_addend); | ||
248 | -#else | ||
249 | -# if !defined(CONFIG_SOFTMMU) | ||
250 | - if (guest_base) { | ||
251 | + if (!tcg_use_softmmu && guest_base) { | ||
252 | int seg = setup_guest_base_seg(); | ||
253 | if (seg != 0) { | ||
254 | x86_guest_base.seg = seg; | ||
255 | } else if (guest_base == (int32_t)guest_base) { | ||
256 | x86_guest_base.ofs = guest_base; | ||
257 | } else { | ||
258 | + assert(TCG_TARGET_REG_BITS == 64); | ||
259 | /* Choose R12 because, as a base, it requires a SIB byte. */ | ||
260 | x86_guest_base.index = TCG_REG_R12; | ||
261 | tcg_out_movi(s, TCG_TYPE_PTR, x86_guest_base.index, guest_base); | ||
262 | tcg_regset_set_reg(s->reserved_regs, x86_guest_base.index); | ||
263 | } | ||
264 | } | ||
265 | -# endif | ||
266 | - tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); | ||
267 | - tcg_out_addi(s, TCG_REG_ESP, -stack_addend); | ||
268 | - /* jmp *tb. */ | ||
269 | - tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]); | ||
270 | -#endif | ||
271 | + | ||
272 | + if (TCG_TARGET_REG_BITS == 32) { | ||
273 | + tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, | ||
274 | + (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4); | ||
275 | + tcg_out_addi(s, TCG_REG_ESP, -stack_addend); | ||
276 | + /* jmp *tb. */ | ||
277 | + tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP, | ||
278 | + (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4 | ||
279 | + + stack_addend); | ||
280 | + } else { | ||
281 | + tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); | ||
282 | + tcg_out_addi(s, TCG_REG_ESP, -stack_addend); | ||
283 | + /* jmp *tb. */ | ||
284 | + tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]); | ||
285 | + } | ||
286 | |||
287 | /* | ||
288 | * Return path for goto_ptr. Set return value to 0, a-la exit_tb, | ||
159 | -- | 289 | -- |
160 | 2.34.1 | 290 | 2.34.1 |
291 | |||
292 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Anton Johansson <anjo@rev.ng> | ||
2 | 1 | ||
3 | In replacing target_ulong with vaddr and TARGET_FMT_lx with VADDR_PRIx, | ||
4 | the zero-padding of TARGET_FMT_lx got lost. Readd 16-wide zero-padding | ||
5 | for logging consistency. | ||
6 | |||
7 | Suggested-by: Peter Maydell <peter.maydell@linaro.org> | ||
8 | Signed-off-by: Anton Johansson <anjo@rev.ng> | ||
9 | Message-Id: <20230713120746.26897-1-anjo@rev.ng> | ||
10 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | --- | ||
13 | accel/tcg/cputlb.c | 20 ++++++++++---------- | ||
14 | 1 file changed, 10 insertions(+), 10 deletions(-) | ||
15 | |||
16 | diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/accel/tcg/cputlb.c | ||
19 | +++ b/accel/tcg/cputlb.c | ||
20 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_locked(CPUArchState *env, int midx, vaddr page) | ||
21 | |||
22 | /* Check if we need to flush due to large pages. */ | ||
23 | if ((page & lp_mask) == lp_addr) { | ||
24 | - tlb_debug("forcing full flush midx %d (%" | ||
25 | - VADDR_PRIx "/%" VADDR_PRIx ")\n", | ||
26 | + tlb_debug("forcing full flush midx %d (%016" | ||
27 | + VADDR_PRIx "/%016" VADDR_PRIx ")\n", | ||
28 | midx, lp_addr, lp_mask); | ||
29 | tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); | ||
30 | } else { | ||
31 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu, | ||
32 | |||
33 | assert_cpu_is_self(cpu); | ||
34 | |||
35 | - tlb_debug("page addr: %" VADDR_PRIx " mmu_map:0x%x\n", addr, idxmap); | ||
36 | + tlb_debug("page addr: %016" VADDR_PRIx " mmu_map:0x%x\n", addr, idxmap); | ||
37 | |||
38 | qemu_spin_lock(&env_tlb(env)->c.lock); | ||
39 | for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { | ||
40 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu, | ||
41 | |||
42 | void tlb_flush_page_by_mmuidx(CPUState *cpu, vaddr addr, uint16_t idxmap) | ||
43 | { | ||
44 | - tlb_debug("addr: %" VADDR_PRIx " mmu_idx:%" PRIx16 "\n", addr, idxmap); | ||
45 | + tlb_debug("addr: %016" VADDR_PRIx " mmu_idx:%" PRIx16 "\n", addr, idxmap); | ||
46 | |||
47 | /* This should already be page aligned */ | ||
48 | addr &= TARGET_PAGE_MASK; | ||
49 | @@ -XXX,XX +XXX,XX @@ void tlb_flush_page(CPUState *cpu, vaddr addr) | ||
50 | void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, vaddr addr, | ||
51 | uint16_t idxmap) | ||
52 | { | ||
53 | - tlb_debug("addr: %" VADDR_PRIx " mmu_idx:%"PRIx16"\n", addr, idxmap); | ||
54 | + tlb_debug("addr: %016" VADDR_PRIx " mmu_idx:%"PRIx16"\n", addr, idxmap); | ||
55 | |||
56 | /* This should already be page aligned */ | ||
57 | addr &= TARGET_PAGE_MASK; | ||
58 | @@ -XXX,XX +XXX,XX @@ void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu, | ||
59 | vaddr addr, | ||
60 | uint16_t idxmap) | ||
61 | { | ||
62 | - tlb_debug("addr: %" VADDR_PRIx " mmu_idx:%"PRIx16"\n", addr, idxmap); | ||
63 | + tlb_debug("addr: %016" VADDR_PRIx " mmu_idx:%"PRIx16"\n", addr, idxmap); | ||
64 | |||
65 | /* This should already be page aligned */ | ||
66 | addr &= TARGET_PAGE_MASK; | ||
67 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_range_locked(CPUArchState *env, int midx, | ||
68 | */ | ||
69 | if (mask < f->mask || len > f->mask) { | ||
70 | tlb_debug("forcing full flush midx %d (" | ||
71 | - "%" VADDR_PRIx "/%" VADDR_PRIx "+%" VADDR_PRIx ")\n", | ||
72 | + "%016" VADDR_PRIx "/%016" VADDR_PRIx "+%016" VADDR_PRIx ")\n", | ||
73 | midx, addr, mask, len); | ||
74 | tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); | ||
75 | return; | ||
76 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_range_locked(CPUArchState *env, int midx, | ||
77 | */ | ||
78 | if (((addr + len - 1) & d->large_page_mask) == d->large_page_addr) { | ||
79 | tlb_debug("forcing full flush midx %d (" | ||
80 | - "%" VADDR_PRIx "/%" VADDR_PRIx ")\n", | ||
81 | + "%016" VADDR_PRIx "/%016" VADDR_PRIx ")\n", | ||
82 | midx, d->large_page_addr, d->large_page_mask); | ||
83 | tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); | ||
84 | return; | ||
85 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu, | ||
86 | |||
87 | assert_cpu_is_self(cpu); | ||
88 | |||
89 | - tlb_debug("range: %" VADDR_PRIx "/%u+%" VADDR_PRIx " mmu_map:0x%x\n", | ||
90 | + tlb_debug("range: %016" VADDR_PRIx "/%u+%016" VADDR_PRIx " mmu_map:0x%x\n", | ||
91 | d.addr, d.bits, d.len, d.idxmap); | ||
92 | |||
93 | qemu_spin_lock(&env_tlb(env)->c.lock); | ||
94 | @@ -XXX,XX +XXX,XX @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, | ||
95 | &xlat, &sz, full->attrs, &prot); | ||
96 | assert(sz >= TARGET_PAGE_SIZE); | ||
97 | |||
98 | - tlb_debug("vaddr=%" VADDR_PRIx " paddr=0x" HWADDR_FMT_plx | ||
99 | + tlb_debug("vaddr=%016" VADDR_PRIx " paddr=0x" HWADDR_FMT_plx | ||
100 | " prot=%x idx=%d\n", | ||
101 | addr, full->phys_addr, prot, mmu_idx); | ||
102 | |||
103 | -- | ||
104 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Luca Bonissi <qemu@bonslack.org> | ||
2 | 1 | ||
3 | These should match 'start' as target_ulong, not target_long. | ||
4 | |||
5 | On 32bit targets, the parameter was sign-extended to uint64_t, | ||
6 | so only the first mmap within the upper 2GB memory can succeed. | ||
7 | |||
8 | Signed-off-by: Luca Bonissi <qemu@bonslack.org> | ||
9 | Message-Id: <327460e2-0ebd-9edb-426b-1df80d16c32a@bonslack.org> | ||
10 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | --- | ||
13 | accel/tcg/user-exec.c | 4 ++-- | ||
14 | 1 file changed, 2 insertions(+), 2 deletions(-) | ||
15 | |||
16 | diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/accel/tcg/user-exec.c | ||
19 | +++ b/accel/tcg/user-exec.c | ||
20 | @@ -XXX,XX +XXX,XX @@ typedef struct PageFlagsNode { | ||
21 | |||
22 | static IntervalTreeRoot pageflags_root; | ||
23 | |||
24 | -static PageFlagsNode *pageflags_find(target_ulong start, target_long last) | ||
25 | +static PageFlagsNode *pageflags_find(target_ulong start, target_ulong last) | ||
26 | { | ||
27 | IntervalTreeNode *n; | ||
28 | |||
29 | @@ -XXX,XX +XXX,XX @@ static PageFlagsNode *pageflags_find(target_ulong start, target_long last) | ||
30 | } | ||
31 | |||
32 | static PageFlagsNode *pageflags_next(PageFlagsNode *p, target_ulong start, | ||
33 | - target_long last) | ||
34 | + target_ulong last) | ||
35 | { | ||
36 | IntervalTreeNode *n; | ||
37 | |||
38 | -- | ||
39 | 2.34.1 | diff view generated by jsdifflib |