1 | The following changes since commit d1181d29370a4318a9f11ea92065bea6bb159f83: | 1 | Version 2 drops the bsd cleanup and includes a minor improvement |
---|---|---|---|
2 | to the dump of the constant pool. | ||
2 | 3 | ||
3 | Merge tag 'pull-nbd-2023-07-19' of https://repo.or.cz/qemu/ericb into staging (2023-07-20 09:54:07 +0100) | 4 | |
5 | r~ | ||
6 | |||
7 | |||
8 | The following changes since commit 2d3fc4e2b069494b1e9e2e4a1e3de24cbc036426: | ||
9 | |||
10 | Merge remote-tracking branch 'remotes/armbru/tags/pull-misc-2021-05-12' into staging (2021-05-13 20:13:24 +0100) | ||
4 | 11 | ||
5 | are available in the Git repository at: | 12 | are available in the Git repository at: |
6 | 13 | ||
7 | https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230724 | 14 | https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20210516 |
8 | 15 | ||
9 | for you to fetch changes up to 32b120394c578bc824f1db4835b3bffbeca88fae: | 16 | for you to fetch changes up to 6c6a4a76eea900112c343ba4f9c5737e298feddf: |
10 | 17 | ||
11 | accel/tcg: Fix type of 'last' for pageflags_{find,next} (2023-07-24 09:48:49 +0100) | 18 | accel/tcg: Align data dumped at end of TB (2021-05-16 09:05:14 -0500) |
12 | 19 | ||
13 | ---------------------------------------------------------------- | 20 | ---------------------------------------------------------------- |
14 | accel/tcg: Zero-pad vaddr in tlb debug output | 21 | Minor MAINTAINERS update. |
15 | accel/tcg: Fix type of 'last' for pageflags_{find,next} | 22 | Tweak to includes. |
16 | accel/tcg: Fix sense of read-only probes in ldst_atomicity | 23 | Add tcg_constant_tl. |
17 | accel/tcg: Take mmap_lock in load_atomic*_or_exit | 24 | Improve constant pool dump. |
18 | tcg: Add earlyclobber to op_add2 for x86 and s390x | ||
19 | tcg/ppc: Fix race in goto_tb implementation | ||
20 | 25 | ||
21 | ---------------------------------------------------------------- | 26 | ---------------------------------------------------------------- |
22 | Anton Johansson (1): | 27 | Matheus Ferst (1): |
23 | accel/tcg: Zero-pad vaddr in tlb_debug output | 28 | tcg: Add tcg_constant_tl |
24 | 29 | ||
25 | Ilya Leoshkevich (1): | 30 | Philippe Mathieu-Daudé (3): |
26 | tcg/{i386, s390x}: Add earlyclobber to the op_add2's first output | 31 | MAINTAINERS: Add include/exec/gen-icount.h to 'Main Loop' section |
32 | exec/gen-icount.h: Add missing "exec/exec-all.h" include | ||
33 | accel/tcg: Align data dumped at end of TB | ||
27 | 34 | ||
28 | Jordan Niethe (1): | 35 | include/exec/gen-icount.h | 1 + |
29 | tcg/ppc: Fix race in goto_tb implementation | 36 | include/tcg/tcg-op.h | 2 ++ |
37 | accel/tcg/translate-all.c | 11 +++++++++-- | ||
38 | MAINTAINERS | 1 + | ||
39 | 4 files changed, 13 insertions(+), 2 deletions(-) | ||
30 | 40 | ||
31 | Luca Bonissi (1): | ||
32 | accel/tcg: Fix type of 'last' for pageflags_{find,next} | ||
33 | |||
34 | Richard Henderson (3): | ||
35 | include/exec: Add WITH_MMAP_LOCK_GUARD | ||
36 | accel/tcg: Fix sense of read-only probes in ldst_atomicity | ||
37 | accel/tcg: Take mmap_lock in load_atomic*_or_exit | ||
38 | |||
39 | include/exec/exec-all.h | 10 ++++++++++ | ||
40 | tcg/i386/tcg-target-con-set.h | 5 ++++- | ||
41 | tcg/s390x/tcg-target-con-set.h | 8 +++++--- | ||
42 | accel/tcg/cputlb.c | 20 ++++++++++---------- | ||
43 | accel/tcg/user-exec.c | 4 ++-- | ||
44 | bsd-user/mmap.c | 1 + | ||
45 | linux-user/mmap.c | 1 + | ||
46 | tcg/tcg.c | 8 +++++++- | ||
47 | accel/tcg/ldst_atomicity.c.inc | 32 ++++++++++++++++++-------------- | ||
48 | tcg/i386/tcg-target.c.inc | 2 +- | ||
49 | tcg/ppc/tcg-target.c.inc | 9 +++++---- | ||
50 | tcg/s390x/tcg-target.c.inc | 4 ++-- | ||
51 | 12 files changed, 66 insertions(+), 38 deletions(-) | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Jordan Niethe <jniethe5@gmail.com> | ||
2 | 1 | ||
3 | Commit 20b6643324 ("tcg/ppc: Reorg goto_tb implementation") modified | ||
4 | goto_tb to ensure only a single instruction was patched to prevent | ||
5 | incorrect behavior if a thread was in the middle of multiple | ||
6 | instructions when they were replaced. However this introduced a race | ||
7 | between loading the jmp target into TCG_REG_TB and patching and | ||
8 | executing the direct branch. | ||
9 | |||
10 | The relevant part of the goto_tb implementation: | ||
11 | |||
12 | ld TCG_REG_TB, TARGET_ADDR_LOCATION(TCG_REG_TB) | ||
13 | patch_location: | ||
14 | mtctr TCG_REG_TB | ||
15 | bctr | ||
16 | |||
17 | tb_target_set_jmp_target() will replace 'patch_location' with a direct | ||
18 | branch if the target is in range. The direct branch now relies on | ||
19 | TCG_REG_TB being set up correctly by the ld. Prior to this commit | ||
20 | multiple instructions were patched in for the direct branch case; these | ||
21 | instructions would initialize TCG_REG_TB to the same value as the branch | ||
22 | target. | ||
23 | |||
24 | Imagine the following sequence: | ||
25 | |||
26 | 1) Thread A is executing the goto_tb sequence and loads the jmp | ||
27 | target into TCG_REG_TB. | ||
28 | |||
29 | 2) Thread B updates the jmp target address and calls | ||
30 | tb_target_set_jmp_target(). This patches a new direct branch into the | ||
31 | goto_tb sequence. | ||
32 | |||
33 | 3) Thread A executes the newly patched direct branch. The value in | ||
34 | TCG_REG_TB still contains the old jmp target. | ||
35 | |||
36 | TCG_REG_TB MUST contain the translation block's tc.ptr. Execution will | ||
37 | eventually crash after performing memory accesses generated from a | ||
38 | faulty value in TCG_REG_TB. | ||
39 | |||
40 | This presents as segfaults or illegal instruction exceptions. | ||
41 | |||
42 | Do not revert commit 20b6643324 as it did fix a different race | ||
43 | condition. Instead remove the direct branch optimization and always use | ||
44 | indirect branches. | ||
45 | |||
46 | The direct branch optimization can be re-added later with a race free | ||
47 | sequence. | ||
48 | |||
49 | Fixes: 20b6643324 ("tcg/ppc: Reorg goto_tb implementation") | ||
50 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1726 | ||
51 | Reported-by: Anushree Mathur <anushree.mathur@linux.vnet.ibm.com> | ||
52 | Tested-by: Anushree Mathur <anushree.mathur@linux.vnet.ibm.com> | ||
53 | Tested-by: Michael Tokarev <mjt@tls.msk.ru> | ||
54 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
55 | Co-developed-by: Benjamin Gray <bgray@linux.ibm.com> | ||
56 | Signed-off-by: Jordan Niethe <jniethe5@gmail.com> | ||
57 | Signed-off-by: Benjamin Gray <bgray@linux.ibm.com> | ||
58 | Message-Id: <20230717093001.13167-1-jniethe5@gmail.com> | ||
59 | --- | ||
60 | tcg/ppc/tcg-target.c.inc | 9 +++++---- | ||
61 | 1 file changed, 5 insertions(+), 4 deletions(-) | ||
62 | |||
63 | diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc | ||
64 | index XXXXXXX..XXXXXXX 100644 | ||
65 | --- a/tcg/ppc/tcg-target.c.inc | ||
66 | +++ b/tcg/ppc/tcg-target.c.inc | ||
67 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_goto_tb(TCGContext *s, int which) | ||
68 | ptrdiff_t offset = tcg_tbrel_diff(s, (void *)ptr); | ||
69 | tcg_out_mem_long(s, LD, LDX, TCG_REG_TB, TCG_REG_TB, offset); | ||
70 | |||
71 | - /* Direct branch will be patched by tb_target_set_jmp_target. */ | ||
72 | + /* TODO: Use direct branches when possible. */ | ||
73 | set_jmp_insn_offset(s, which); | ||
74 | tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR); | ||
75 | |||
76 | - /* When branch is out of range, fall through to indirect. */ | ||
77 | tcg_out32(s, BCCTR | BO_ALWAYS); | ||
78 | |||
79 | /* For the unlinked case, need to reset TCG_REG_TB. */ | ||
80 | @@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n, | ||
81 | intptr_t diff = addr - jmp_rx; | ||
82 | tcg_insn_unit insn; | ||
83 | |||
84 | + if (USE_REG_TB) { | ||
85 | + return; | ||
86 | + } | ||
87 | + | ||
88 | if (in_range_b(diff)) { | ||
89 | insn = B | (diff & 0x3fffffc); | ||
90 | - } else if (USE_REG_TB) { | ||
91 | - insn = MTSPR | RS(TCG_REG_TB) | CTR; | ||
92 | } else { | ||
93 | insn = NOP; | ||
94 | } | ||
95 | -- | ||
96 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
2 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
3 | --- | ||
4 | include/exec/exec-all.h | 10 ++++++++++ | ||
5 | bsd-user/mmap.c | 1 + | ||
6 | linux-user/mmap.c | 1 + | ||
7 | 3 files changed, 12 insertions(+) | ||
8 | 1 | ||
9 | diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h | ||
10 | index XXXXXXX..XXXXXXX 100644 | ||
11 | --- a/include/exec/exec-all.h | ||
12 | +++ b/include/exec/exec-all.h | ||
13 | @@ -XXX,XX +XXX,XX @@ void TSA_NO_TSA mmap_lock(void); | ||
14 | void TSA_NO_TSA mmap_unlock(void); | ||
15 | bool have_mmap_lock(void); | ||
16 | |||
17 | +static inline void mmap_unlock_guard(void *unused) | ||
18 | +{ | ||
19 | + mmap_unlock(); | ||
20 | +} | ||
21 | + | ||
22 | +#define WITH_MMAP_LOCK_GUARD() \ | ||
23 | + for (int _mmap_lock_iter __attribute__((cleanup(mmap_unlock_guard))) \ | ||
24 | + = (mmap_lock(), 0); _mmap_lock_iter == 0; _mmap_lock_iter = 1) | ||
25 | + | ||
26 | /** | ||
27 | * adjust_signal_pc: | ||
28 | * @pc: raw pc from the host signal ucontext_t. | ||
29 | @@ -XXX,XX +XXX,XX @@ G_NORETURN void cpu_loop_exit_sigbus(CPUState *cpu, target_ulong addr, | ||
30 | #else | ||
31 | static inline void mmap_lock(void) {} | ||
32 | static inline void mmap_unlock(void) {} | ||
33 | +#define WITH_MMAP_LOCK_GUARD() | ||
34 | |||
35 | void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length); | ||
36 | void tlb_set_dirty(CPUState *cpu, vaddr addr); | ||
37 | diff --git a/bsd-user/mmap.c b/bsd-user/mmap.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/bsd-user/mmap.c | ||
40 | +++ b/bsd-user/mmap.c | ||
41 | @@ -XXX,XX +XXX,XX @@ void mmap_lock(void) | ||
42 | |||
43 | void mmap_unlock(void) | ||
44 | { | ||
45 | + assert(mmap_lock_count > 0); | ||
46 | if (--mmap_lock_count == 0) { | ||
47 | pthread_mutex_unlock(&mmap_mutex); | ||
48 | } | ||
49 | diff --git a/linux-user/mmap.c b/linux-user/mmap.c | ||
50 | index XXXXXXX..XXXXXXX 100644 | ||
51 | --- a/linux-user/mmap.c | ||
52 | +++ b/linux-user/mmap.c | ||
53 | @@ -XXX,XX +XXX,XX @@ void mmap_lock(void) | ||
54 | |||
55 | void mmap_unlock(void) | ||
56 | { | ||
57 | + assert(mmap_lock_count > 0); | ||
58 | if (--mmap_lock_count == 0) { | ||
59 | pthread_mutex_unlock(&mmap_mutex); | ||
60 | } | ||
61 | -- | ||
62 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | In the initial commit, cdfac37be0d, the sense of the test is incorrect, | ||
2 | as the -1/0 return was confusing. In bef6f008b981, we mechanically | ||
3 | invert all callers while changing to false/true return, preserving the | ||
4 | incorrectness of the test. | ||
5 | 1 | ||
6 | Now that the return sense is sane, it's easy to see that if !write, | ||
7 | then the page is not modifiable (i.e. most likely read-only, with | ||
8 | PROT_NONE handled via SIGSEGV). | ||
9 | |||
10 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | --- | ||
13 | accel/tcg/ldst_atomicity.c.inc | 4 ++-- | ||
14 | 1 file changed, 2 insertions(+), 2 deletions(-) | ||
15 | |||
16 | diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/accel/tcg/ldst_atomicity.c.inc | ||
19 | +++ b/accel/tcg/ldst_atomicity.c.inc | ||
20 | @@ -XXX,XX +XXX,XX @@ static uint64_t load_atomic8_or_exit(CPUArchState *env, uintptr_t ra, void *pv) | ||
21 | * another process, because the fallback start_exclusive solution | ||
22 | * provides no protection across processes. | ||
23 | */ | ||
24 | - if (page_check_range(h2g(pv), 8, PAGE_WRITE_ORG)) { | ||
25 | + if (!page_check_range(h2g(pv), 8, PAGE_WRITE_ORG)) { | ||
26 | uint64_t *p = __builtin_assume_aligned(pv, 8); | ||
27 | return *p; | ||
28 | } | ||
29 | @@ -XXX,XX +XXX,XX @@ static Int128 load_atomic16_or_exit(CPUArchState *env, uintptr_t ra, void *pv) | ||
30 | * another process, because the fallback start_exclusive solution | ||
31 | * provides no protection across processes. | ||
32 | */ | ||
33 | - if (page_check_range(h2g(p), 16, PAGE_WRITE_ORG)) { | ||
34 | + if (!page_check_range(h2g(p), 16, PAGE_WRITE_ORG)) { | ||
35 | return *p; | ||
36 | } | ||
37 | #endif | ||
38 | -- | ||
39 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Luca Bonissi <qemu@bonslack.org> | 1 | From: Philippe Mathieu-Daudé <f4bug@amsat.org> |
---|---|---|---|
2 | 2 | ||
3 | These should match 'start' as target_ulong, not target_long. | 3 | As the 'Main Loop' section covers softmmu/icount.c, |
4 | add "exec/gen-icount.h" there too. | ||
4 | 5 | ||
5 | On 32bit targets, the parameter was sign-extended to uint64_t, | 6 | Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org> |
6 | so only the first mmap within the upper 2GB memory can succeed. | 7 | Message-Id: <20210422064128.2318616-2-f4bug@amsat.org> |
7 | |||
8 | Signed-off-by: Luca Bonissi <qemu@bonslack.org> | ||
9 | Message-Id: <327460e2-0ebd-9edb-426b-1df80d16c32a@bonslack.org> | ||
10 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 8 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
12 | --- | 9 | --- |
13 | accel/tcg/user-exec.c | 4 ++-- | 10 | MAINTAINERS | 1 + |
14 | 1 file changed, 2 insertions(+), 2 deletions(-) | 11 | 1 file changed, 1 insertion(+) |
15 | 12 | ||
16 | diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c | 13 | diff --git a/MAINTAINERS b/MAINTAINERS |
17 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/accel/tcg/user-exec.c | 15 | --- a/MAINTAINERS |
19 | +++ b/accel/tcg/user-exec.c | 16 | +++ b/MAINTAINERS |
20 | @@ -XXX,XX +XXX,XX @@ typedef struct PageFlagsNode { | 17 | @@ -XXX,XX +XXX,XX @@ F: ui/cocoa.m |
21 | 18 | Main loop | |
22 | static IntervalTreeRoot pageflags_root; | 19 | M: Paolo Bonzini <pbonzini@redhat.com> |
23 | 20 | S: Maintained | |
24 | -static PageFlagsNode *pageflags_find(target_ulong start, target_long last) | 21 | +F: include/exec/gen-icount.h |
25 | +static PageFlagsNode *pageflags_find(target_ulong start, target_ulong last) | 22 | F: include/qemu/main-loop.h |
26 | { | 23 | F: include/sysemu/runstate.h |
27 | IntervalTreeNode *n; | 24 | F: include/sysemu/runstate-action.h |
28 | |||
29 | @@ -XXX,XX +XXX,XX @@ static PageFlagsNode *pageflags_find(target_ulong start, target_long last) | ||
30 | } | ||
31 | |||
32 | static PageFlagsNode *pageflags_next(PageFlagsNode *p, target_ulong start, | ||
33 | - target_long last) | ||
34 | + target_ulong last) | ||
35 | { | ||
36 | IntervalTreeNode *n; | ||
37 | |||
38 | -- | 25 | -- |
39 | 2.34.1 | 26 | 2.25.1 |
27 | |||
28 | diff view generated by jsdifflib |
1 | From: Anton Johansson <anjo@rev.ng> | 1 | From: Philippe Mathieu-Daudé <f4bug@amsat.org> |
---|---|---|---|
2 | 2 | ||
3 | In replacing target_ulong with vaddr and TARGET_FMT_lx with VADDR_PRIx, | 3 | When including "exec/gen-icount.h" we get: |
4 | the zero-padding of TARGET_FMT_lx got lost. Readd 16-wide zero-padding | ||
5 | for logging consistency. | ||
6 | 4 | ||
7 | Suggested-by: Peter Maydell <peter.maydell@linaro.org> | 5 | include/exec/gen-icount.h: In function ‘gen_tb_start’: |
8 | Signed-off-by: Anton Johansson <anjo@rev.ng> | 6 | include/exec/gen-icount.h:40:9: error: implicit declaration of function ‘tb_cflags’ [-Werror=implicit-function-declaration] |
9 | Message-Id: <20230713120746.26897-1-anjo@rev.ng> | 7 | 40 | if (tb_cflags(tb) & CF_USE_ICOUNT) { |
10 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 8 | | ^~~~~~~~~ |
9 | include/exec/gen-icount.h:40:9: error: nested extern declaration of ‘tb_cflags’ [-Werror=nested-externs] | ||
10 | include/exec/gen-icount.h:40:25: error: ‘CF_USE_ICOUNT’ undeclared (first use in this function); did you mean ‘CPU_COUNT’? | ||
11 | 40 | if (tb_cflags(tb) & CF_USE_ICOUNT) { | ||
12 | | ^~~~~~~~~~~~~ | ||
13 | | CPU_COUNT | ||
14 | |||
15 | Since tb_cflags() is declared in "exec/exec-all.h", include this | ||
16 | header in "exec/gen-icount.h". | ||
17 | |||
18 | Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
19 | Message-Id: <20210422064128.2318616-3-f4bug@amsat.org> | ||
11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 20 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
12 | --- | 21 | --- |
13 | accel/tcg/cputlb.c | 20 ++++++++++---------- | 22 | include/exec/gen-icount.h | 1 + |
14 | 1 file changed, 10 insertions(+), 10 deletions(-) | 23 | 1 file changed, 1 insertion(+) |
15 | 24 | ||
16 | diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c | 25 | diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h |
17 | index XXXXXXX..XXXXXXX 100644 | 26 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/accel/tcg/cputlb.c | 27 | --- a/include/exec/gen-icount.h |
19 | +++ b/accel/tcg/cputlb.c | 28 | +++ b/include/exec/gen-icount.h |
20 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_locked(CPUArchState *env, int midx, vaddr page) | 29 | @@ -XXX,XX +XXX,XX @@ |
21 | 30 | #ifndef GEN_ICOUNT_H | |
22 | /* Check if we need to flush due to large pages. */ | 31 | #define GEN_ICOUNT_H |
23 | if ((page & lp_mask) == lp_addr) { | 32 | |
24 | - tlb_debug("forcing full flush midx %d (%" | 33 | +#include "exec/exec-all.h" |
25 | - VADDR_PRIx "/%" VADDR_PRIx ")\n", | 34 | #include "qemu/timer.h" |
26 | + tlb_debug("forcing full flush midx %d (%016" | 35 | |
27 | + VADDR_PRIx "/%016" VADDR_PRIx ")\n", | 36 | /* Helpers for instruction counting code generation. */ |
28 | midx, lp_addr, lp_mask); | ||
29 | tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); | ||
30 | } else { | ||
31 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu, | ||
32 | |||
33 | assert_cpu_is_self(cpu); | ||
34 | |||
35 | - tlb_debug("page addr: %" VADDR_PRIx " mmu_map:0x%x\n", addr, idxmap); | ||
36 | + tlb_debug("page addr: %016" VADDR_PRIx " mmu_map:0x%x\n", addr, idxmap); | ||
37 | |||
38 | qemu_spin_lock(&env_tlb(env)->c.lock); | ||
39 | for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { | ||
40 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu, | ||
41 | |||
42 | void tlb_flush_page_by_mmuidx(CPUState *cpu, vaddr addr, uint16_t idxmap) | ||
43 | { | ||
44 | - tlb_debug("addr: %" VADDR_PRIx " mmu_idx:%" PRIx16 "\n", addr, idxmap); | ||
45 | + tlb_debug("addr: %016" VADDR_PRIx " mmu_idx:%" PRIx16 "\n", addr, idxmap); | ||
46 | |||
47 | /* This should already be page aligned */ | ||
48 | addr &= TARGET_PAGE_MASK; | ||
49 | @@ -XXX,XX +XXX,XX @@ void tlb_flush_page(CPUState *cpu, vaddr addr) | ||
50 | void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, vaddr addr, | ||
51 | uint16_t idxmap) | ||
52 | { | ||
53 | - tlb_debug("addr: %" VADDR_PRIx " mmu_idx:%"PRIx16"\n", addr, idxmap); | ||
54 | + tlb_debug("addr: %016" VADDR_PRIx " mmu_idx:%"PRIx16"\n", addr, idxmap); | ||
55 | |||
56 | /* This should already be page aligned */ | ||
57 | addr &= TARGET_PAGE_MASK; | ||
58 | @@ -XXX,XX +XXX,XX @@ void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu, | ||
59 | vaddr addr, | ||
60 | uint16_t idxmap) | ||
61 | { | ||
62 | - tlb_debug("addr: %" VADDR_PRIx " mmu_idx:%"PRIx16"\n", addr, idxmap); | ||
63 | + tlb_debug("addr: %016" VADDR_PRIx " mmu_idx:%"PRIx16"\n", addr, idxmap); | ||
64 | |||
65 | /* This should already be page aligned */ | ||
66 | addr &= TARGET_PAGE_MASK; | ||
67 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_range_locked(CPUArchState *env, int midx, | ||
68 | */ | ||
69 | if (mask < f->mask || len > f->mask) { | ||
70 | tlb_debug("forcing full flush midx %d (" | ||
71 | - "%" VADDR_PRIx "/%" VADDR_PRIx "+%" VADDR_PRIx ")\n", | ||
72 | + "%016" VADDR_PRIx "/%016" VADDR_PRIx "+%016" VADDR_PRIx ")\n", | ||
73 | midx, addr, mask, len); | ||
74 | tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); | ||
75 | return; | ||
76 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_range_locked(CPUArchState *env, int midx, | ||
77 | */ | ||
78 | if (((addr + len - 1) & d->large_page_mask) == d->large_page_addr) { | ||
79 | tlb_debug("forcing full flush midx %d (" | ||
80 | - "%" VADDR_PRIx "/%" VADDR_PRIx ")\n", | ||
81 | + "%016" VADDR_PRIx "/%016" VADDR_PRIx ")\n", | ||
82 | midx, d->large_page_addr, d->large_page_mask); | ||
83 | tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime()); | ||
84 | return; | ||
85 | @@ -XXX,XX +XXX,XX @@ static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu, | ||
86 | |||
87 | assert_cpu_is_self(cpu); | ||
88 | |||
89 | - tlb_debug("range: %" VADDR_PRIx "/%u+%" VADDR_PRIx " mmu_map:0x%x\n", | ||
90 | + tlb_debug("range: %016" VADDR_PRIx "/%u+%016" VADDR_PRIx " mmu_map:0x%x\n", | ||
91 | d.addr, d.bits, d.len, d.idxmap); | ||
92 | |||
93 | qemu_spin_lock(&env_tlb(env)->c.lock); | ||
94 | @@ -XXX,XX +XXX,XX @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx, | ||
95 | &xlat, &sz, full->attrs, &prot); | ||
96 | assert(sz >= TARGET_PAGE_SIZE); | ||
97 | |||
98 | - tlb_debug("vaddr=%" VADDR_PRIx " paddr=0x" HWADDR_FMT_plx | ||
99 | + tlb_debug("vaddr=%016" VADDR_PRIx " paddr=0x" HWADDR_FMT_plx | ||
100 | " prot=%x idx=%d\n", | ||
101 | addr, full->phys_addr, prot, mmu_idx); | ||
102 | |||
103 | -- | 37 | -- |
104 | 2.34.1 | 38 | 2.25.1 |
39 | |||
40 | diff view generated by jsdifflib |
1 | From: Ilya Leoshkevich <iii@linux.ibm.com> | 1 | From: Matheus Ferst <matheus.ferst@eldorado.org.br> |
---|---|---|---|
2 | 2 | ||
3 | i386 and s390x implementations of op_add2 require an earlyclobber, | 3 | Used in ppc D/DS/X-form load/store implementation. |
4 | which is currently missing. This breaks VCKSM in s390x guests. E.g., on | ||
5 | x86_64 the following op: | ||
6 | 4 | ||
7 | add2_i32 tmp2,tmp3,tmp2,tmp3,tmp3,tmp2 dead: 0 2 3 4 5 pref=none,0xffff | 5 | Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br> |
8 | 6 | Message-Id: <20210512185441.3619828-24-matheus.ferst@eldorado.org.br> | |
9 | is translated to: | ||
10 | |||
11 | addl %ebx, %r12d | ||
12 | adcl %r12d, %ebx | ||
13 | |||
14 | Introduce a new C_N1_O1_I4 constraint, and make sure that earlyclobber | ||
15 | of aliased outputs is honored. | ||
16 | |||
17 | Cc: qemu-stable@nongnu.org | ||
18 | Fixes: 82790a870992 ("tcg: Add markup for output requires new register") | ||
19 | Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com> | ||
20 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
21 | Message-Id: <20230719221310.1968845-7-iii@linux.ibm.com> | ||
22 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
23 | --- | 8 | --- |
24 | tcg/i386/tcg-target-con-set.h | 5 ++++- | 9 | include/tcg/tcg-op.h | 2 ++ |
25 | tcg/s390x/tcg-target-con-set.h | 8 +++++--- | 10 | 1 file changed, 2 insertions(+) |
26 | tcg/tcg.c | 8 +++++++- | ||
27 | tcg/i386/tcg-target.c.inc | 2 +- | ||
28 | tcg/s390x/tcg-target.c.inc | 4 ++-- | ||
29 | 5 files changed, 19 insertions(+), 8 deletions(-) | ||
30 | 11 | ||
31 | diff --git a/tcg/i386/tcg-target-con-set.h b/tcg/i386/tcg-target-con-set.h | 12 | diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h |
32 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
33 | --- a/tcg/i386/tcg-target-con-set.h | 14 | --- a/include/tcg/tcg-op.h |
34 | +++ b/tcg/i386/tcg-target-con-set.h | 15 | +++ b/include/tcg/tcg-op.h |
35 | @@ -XXX,XX +XXX,XX @@ | 16 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t); |
36 | * | 17 | #define tcg_gen_sextract_tl tcg_gen_sextract_i64 |
37 | * C_N1_Im(...) defines a constraint set with 1 output and <m> inputs, | 18 | #define tcg_gen_extract2_tl tcg_gen_extract2_i64 |
38 | * except that the output must use a new register. | 19 | #define tcg_const_tl tcg_const_i64 |
39 | + * | 20 | +#define tcg_constant_tl tcg_constant_i64 |
40 | + * C_Nn_Om_Ik(...) defines a constraint set with <n + m> outputs and <k> | 21 | #define tcg_const_local_tl tcg_const_local_i64 |
41 | + * inputs, except that the first <n> outputs must use new registers. | 22 | #define tcg_gen_movcond_tl tcg_gen_movcond_i64 |
42 | */ | 23 | #define tcg_gen_add2_tl tcg_gen_add2_i64 |
43 | C_O0_I1(r) | 24 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t); |
44 | C_O0_I2(L, L) | 25 | #define tcg_gen_sextract_tl tcg_gen_sextract_i32 |
45 | @@ -XXX,XX +XXX,XX @@ C_O2_I1(r, r, L) | 26 | #define tcg_gen_extract2_tl tcg_gen_extract2_i32 |
46 | C_O2_I2(a, d, a, r) | 27 | #define tcg_const_tl tcg_const_i32 |
47 | C_O2_I2(r, r, L, L) | 28 | +#define tcg_constant_tl tcg_constant_i32 |
48 | C_O2_I3(a, d, 0, 1, r) | 29 | #define tcg_const_local_tl tcg_const_local_i32 |
49 | -C_O2_I4(r, r, 0, 1, re, re) | 30 | #define tcg_gen_movcond_tl tcg_gen_movcond_i32 |
50 | +C_N1_O1_I4(r, r, 0, 1, re, re) | 31 | #define tcg_gen_add2_tl tcg_gen_add2_i32 |
51 | diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h | ||
52 | index XXXXXXX..XXXXXXX 100644 | ||
53 | --- a/tcg/s390x/tcg-target-con-set.h | ||
54 | +++ b/tcg/s390x/tcg-target-con-set.h | ||
55 | @@ -XXX,XX +XXX,XX @@ | ||
56 | * C_On_Im(...) defines a constraint set with <n> outputs and <m> inputs. | ||
57 | * Each operand should be a sequence of constraint letters as defined by | ||
58 | * tcg-target-con-str.h; the constraint combination is inclusive or. | ||
59 | + * | ||
60 | + * C_Nn_Om_Ik(...) defines a constraint set with <n + m> outputs and <k> | ||
61 | + * inputs, except that the first <n> outputs must use new registers. | ||
62 | */ | ||
63 | C_O0_I1(r) | ||
64 | C_O0_I2(r, r) | ||
65 | @@ -XXX,XX +XXX,XX @@ C_O2_I1(o, m, r) | ||
66 | C_O2_I2(o, m, 0, r) | ||
67 | C_O2_I2(o, m, r, r) | ||
68 | C_O2_I3(o, m, 0, 1, r) | ||
69 | -C_O2_I4(r, r, 0, 1, rA, r) | ||
70 | -C_O2_I4(r, r, 0, 1, ri, r) | ||
71 | -C_O2_I4(r, r, 0, 1, r, r) | ||
72 | +C_N1_O1_I4(r, r, 0, 1, ri, r) | ||
73 | +C_N1_O1_I4(r, r, 0, 1, rA, r) | ||
74 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
75 | index XXXXXXX..XXXXXXX 100644 | ||
76 | --- a/tcg/tcg.c | ||
77 | +++ b/tcg/tcg.c | ||
78 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1, | ||
79 | #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2), | ||
80 | #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3), | ||
81 | #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4), | ||
82 | +#define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4), | ||
83 | |||
84 | typedef enum { | ||
85 | #include "tcg-target-con-set.h" | ||
86 | @@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode); | ||
87 | #undef C_O2_I2 | ||
88 | #undef C_O2_I3 | ||
89 | #undef C_O2_I4 | ||
90 | +#undef C_N1_O1_I4 | ||
91 | |||
92 | /* Put all of the constraint sets into an array, indexed by the enum. */ | ||
93 | |||
94 | @@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode); | ||
95 | #define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } }, | ||
96 | #define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } }, | ||
97 | #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } }, | ||
98 | +#define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { "&" #O1, #O2, #I1, #I2, #I3, #I4 } }, | ||
99 | |||
100 | static const TCGTargetOpDef constraint_sets[] = { | ||
101 | #include "tcg-target-con-set.h" | ||
102 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef constraint_sets[] = { | ||
103 | #undef C_O2_I2 | ||
104 | #undef C_O2_I3 | ||
105 | #undef C_O2_I4 | ||
106 | +#undef C_N1_O1_I4 | ||
107 | |||
108 | /* Expand the enumerator to be returned from tcg_target_op_def(). */ | ||
109 | |||
110 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef constraint_sets[] = { | ||
111 | #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2) | ||
112 | #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3) | ||
113 | #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) | ||
114 | +#define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4) | ||
115 | |||
116 | #include "tcg-target.c.inc" | ||
117 | |||
118 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) | ||
119 | * dead after the instruction, we must allocate a new | ||
120 | * register and move it. | ||
121 | */ | ||
122 | - if (temp_readonly(ts) || !IS_DEAD_ARG(i)) { | ||
123 | + if (temp_readonly(ts) || !IS_DEAD_ARG(i) | ||
124 | + || def->args_ct[arg_ct->alias_index].newreg) { | ||
125 | allocate_new_reg = true; | ||
126 | } else if (ts->val_type == TEMP_VAL_REG) { | ||
127 | /* | ||
128 | diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc | ||
129 | index XXXXXXX..XXXXXXX 100644 | ||
130 | --- a/tcg/i386/tcg-target.c.inc | ||
131 | +++ b/tcg/i386/tcg-target.c.inc | ||
132 | @@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) | ||
133 | case INDEX_op_add2_i64: | ||
134 | case INDEX_op_sub2_i32: | ||
135 | case INDEX_op_sub2_i64: | ||
136 | - return C_O2_I4(r, r, 0, 1, re, re); | ||
137 | + return C_N1_O1_I4(r, r, 0, 1, re, re); | ||
138 | |||
139 | case INDEX_op_ctz_i32: | ||
140 | case INDEX_op_ctz_i64: | ||
141 | diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc | ||
142 | index XXXXXXX..XXXXXXX 100644 | ||
143 | --- a/tcg/s390x/tcg-target.c.inc | ||
144 | +++ b/tcg/s390x/tcg-target.c.inc | ||
145 | @@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) | ||
146 | |||
147 | case INDEX_op_add2_i32: | ||
148 | case INDEX_op_sub2_i32: | ||
149 | - return C_O2_I4(r, r, 0, 1, ri, r); | ||
150 | + return C_N1_O1_I4(r, r, 0, 1, ri, r); | ||
151 | |||
152 | case INDEX_op_add2_i64: | ||
153 | case INDEX_op_sub2_i64: | ||
154 | - return C_O2_I4(r, r, 0, 1, rA, r); | ||
155 | + return C_N1_O1_I4(r, r, 0, 1, rA, r); | ||
156 | |||
157 | case INDEX_op_st_vec: | ||
158 | return C_O0_I2(v, r); | ||
159 | -- | 32 | -- |
160 | 2.34.1 | 33 | 2.25.1 |
34 | |||
35 | diff view generated by jsdifflib |
1 | For user-only, the probe for page writability may race with another | 1 | From: Philippe Mathieu-Daudé <f4bug@amsat.org> |
---|---|---|---|
2 | thread's mprotect. Take the mmap_lock around the operation. This | ||
3 | is still faster than the start/end_exclusive fallback. | ||
4 | 2 | ||
5 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | 3 | To better visualize the data dumped at the end of a TB, left-align it |
4 | (padding it with 0). Print ".long" instead of ".quad" on 32-bit hosts. | ||
5 | |||
6 | Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
7 | Message-Id: <20210515104202.241504-1-f4bug@amsat.org> | ||
8 | [rth: Split the qemu_log and print .long for 32-bit hosts.] | ||
6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
7 | --- | 10 | --- |
8 | accel/tcg/ldst_atomicity.c.inc | 32 ++++++++++++++++++-------------- | 11 | accel/tcg/translate-all.c | 11 +++++++++-- |
9 | 1 file changed, 18 insertions(+), 14 deletions(-) | 12 | 1 file changed, 9 insertions(+), 2 deletions(-) |
10 | 13 | ||
11 | diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc | 14 | diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c |
12 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/accel/tcg/ldst_atomicity.c.inc | 16 | --- a/accel/tcg/translate-all.c |
14 | +++ b/accel/tcg/ldst_atomicity.c.inc | 17 | +++ b/accel/tcg/translate-all.c |
15 | @@ -XXX,XX +XXX,XX @@ static uint64_t load_atomic8_or_exit(CPUArchState *env, uintptr_t ra, void *pv) | 18 | @@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu, |
16 | * another process, because the fallback start_exclusive solution | 19 | int i; |
17 | * provides no protection across processes. | 20 | qemu_log(" data: [size=%d]\n", data_size); |
18 | */ | 21 | for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) { |
19 | - if (!page_check_range(h2g(pv), 8, PAGE_WRITE_ORG)) { | 22 | - qemu_log("0x%08" PRIxPTR ": .quad 0x%" TCG_PRIlx "\n", |
20 | - uint64_t *p = __builtin_assume_aligned(pv, 8); | 23 | - (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]); |
21 | - return *p; | 24 | + if (sizeof(tcg_target_ulong) == 8) { |
22 | + WITH_MMAP_LOCK_GUARD() { | 25 | + qemu_log("0x%08" PRIxPTR ": .quad 0x%016" TCG_PRIlx "\n", |
23 | + if (!page_check_range(h2g(pv), 8, PAGE_WRITE_ORG)) { | 26 | + (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]); |
24 | + uint64_t *p = __builtin_assume_aligned(pv, 8); | 27 | + } else if (sizeof(tcg_target_ulong) == 4) { |
25 | + return *p; | 28 | + qemu_log("0x%08" PRIxPTR ": .long 0x%08" TCG_PRIlx "\n", |
26 | + } | 29 | + (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]); |
27 | } | 30 | + } else { |
28 | #endif | 31 | + qemu_build_not_reached(); |
29 | 32 | + } | |
30 | @@ -XXX,XX +XXX,XX @@ static Int128 load_atomic16_or_exit(CPUArchState *env, uintptr_t ra, void *pv) | 33 | } |
31 | return atomic16_read_ro(p); | 34 | } |
32 | } | 35 | qemu_log("\n"); |
33 | |||
34 | -#ifdef CONFIG_USER_ONLY | ||
35 | /* | ||
36 | * We can only use cmpxchg to emulate a load if the page is writable. | ||
37 | * If the page is not writable, then assume the value is immutable | ||
38 | * and requires no locking. This ignores the case of MAP_SHARED with | ||
39 | * another process, because the fallback start_exclusive solution | ||
40 | * provides no protection across processes. | ||
41 | + * | ||
42 | + * In system mode all guest pages are writable. For user mode, | ||
43 | + * we must take mmap_lock so that the query remains valid until | ||
44 | + * the write is complete -- tests/tcg/multiarch/munmap-pthread.c | ||
45 | + * is an example that can race. | ||
46 | */ | ||
47 | - if (!page_check_range(h2g(p), 16, PAGE_WRITE_ORG)) { | ||
48 | - return *p; | ||
49 | - } | ||
50 | + WITH_MMAP_LOCK_GUARD() { | ||
51 | +#ifdef CONFIG_USER_ONLY | ||
52 | + if (!page_check_range(h2g(p), 16, PAGE_WRITE_ORG)) { | ||
53 | + return *p; | ||
54 | + } | ||
55 | #endif | ||
56 | - | ||
57 | - /* | ||
58 | - * In system mode all guest pages are writable, and for user-only | ||
59 | - * we have just checked writability. Try cmpxchg. | ||
60 | - */ | ||
61 | - if (HAVE_ATOMIC128_RW) { | ||
62 | - return atomic16_read_rw(p); | ||
63 | + if (HAVE_ATOMIC128_RW) { | ||
64 | + return atomic16_read_rw(p); | ||
65 | + } | ||
66 | } | ||
67 | |||
68 | /* Ultimate fallback: re-execute in serial context. */ | ||
69 | -- | 36 | -- |
70 | 2.34.1 | 37 | 2.25.1 |
38 | |||
39 | diff view generated by jsdifflib |