1 | The following changes since commit 67e41fe0cfb62e6cdfa659f0155417d17e5274ea: | 1 | The following changes since commit 7c18f2d663521f1b31b821a13358ce38075eaf7d: |
---|---|---|---|
2 | 2 | ||
3 | Merge tag 'pull-ppc-20220104' of https://github.com/legoater/qemu into staging (2022-01-04 07:23:27 -0800) | 3 | Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging (2023-04-29 23:07:17 +0100) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20220104 | 7 | https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230502 |
8 | 8 | ||
9 | for you to fetch changes up to d7478d4229f0a2b2817a55487e6b17081099fae4: | 9 | for you to fetch changes up to bdc7fba1c5a29ae218b45353daac9308fe1aae82: |
10 | 10 | ||
11 | common-user: Fix tail calls to safe_syscall_set_errno_tail (2022-01-04 15:41:03 -0800) | 11 | tcg: Introduce tcg_out_movext2 (2023-05-02 12:15:41 +0100) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Fix for safe_syscall_base. | 14 | Misc tcg-related patch queue. |
15 | Fix for folding of vector add/sub. | ||
16 | Fix build on loongarch64 with gcc 8. | ||
17 | Remove decl for qemu_run_machine_init_done_notifiers. | ||
18 | 15 | ||
19 | ---------------------------------------------------------------- | 16 | ---------------------------------------------------------------- |
20 | Philippe Mathieu-Daudé (1): | 17 | Dickon Hood (1): |
21 | linux-user: Fix trivial build error on loongarch64 hosts | 18 | qemu/bitops.h: Limit rotate amounts |
22 | 19 | ||
23 | Richard Henderson (2): | 20 | Kiran Ostrolenk (1): |
24 | tcg/optimize: Fix folding of vector ops | 21 | qemu/host-utils.h: Add clz and ctz functions for lower-bit integers |
25 | common-user: Fix tail calls to safe_syscall_set_errno_tail | ||
26 | 22 | ||
27 | Xiaoyao Li (1): | 23 | Nazar Kazakov (2): |
28 | sysemu: Cleanup qemu_run_machine_init_done_notifiers() | 24 | tcg: Add tcg_gen_gvec_andcs |
25 | tcg: Add tcg_gen_gvec_rotrs | ||
29 | 26 | ||
30 | include/sysemu/sysemu.h | 1 - | 27 | Richard Henderson (7): |
31 | linux-user/host/loongarch64/host-signal.h | 4 +-- | 28 | softmmu: Tidy dirtylimit_dirty_ring_full_time |
32 | tcg/optimize.c | 49 +++++++++++++++++++++++------- | 29 | qemu/int128: Re-shuffle Int128Alias members |
33 | common-user/host/i386/safe-syscall.inc.S | 1 + | 30 | migration/xbzrle: Use __attribute__((target)) for avx512 |
34 | common-user/host/mips/safe-syscall.inc.S | 1 + | 31 | accel/tcg: Add cpu_ld*_code_mmu |
35 | common-user/host/x86_64/safe-syscall.inc.S | 1 + | 32 | tcg/loongarch64: Conditionalize tcg_out_exts_i32_i64 |
36 | 6 files changed, 42 insertions(+), 15 deletions(-) | 33 | tcg/mips: Conditionalize tcg_out_exts_i32_i64 |
34 | tcg: Introduce tcg_out_movext2 | ||
37 | 35 | ||
36 | Weiwei Li (1): | ||
37 | accel/tcg: Uncache the host address for instruction fetch when tlb size < 1 | ||
38 | |||
39 | meson.build | 5 +-- | ||
40 | accel/tcg/tcg-runtime.h | 1 + | ||
41 | include/exec/cpu_ldst.h | 9 ++++++ | ||
42 | include/qemu/bitops.h | 24 +++++++++----- | ||
43 | include/qemu/host-utils.h | 54 +++++++++++++++++++++++++++++++ | ||
44 | include/qemu/int128.h | 4 +-- | ||
45 | include/tcg/tcg-op-gvec.h | 4 +++ | ||
46 | accel/tcg/cputlb.c | 53 ++++++++++++++++++++++++++++++ | ||
47 | accel/tcg/tcg-runtime-gvec.c | 11 +++++++ | ||
48 | accel/tcg/user-exec.c | 58 +++++++++++++++++++++++++++++++++ | ||
49 | migration/xbzrle.c | 9 +++--- | ||
50 | softmmu/dirtylimit.c | 15 ++++++--- | ||
51 | tcg/tcg-op-gvec.c | 28 ++++++++++++++++ | ||
52 | tcg/tcg.c | 69 +++++++++++++++++++++++++++++++++++++--- | ||
53 | tcg/arm/tcg-target.c.inc | 44 +++++++++++-------------- | ||
54 | tcg/i386/tcg-target.c.inc | 19 +++++------ | ||
55 | tcg/loongarch64/tcg-target.c.inc | 4 ++- | ||
56 | tcg/mips/tcg-target.c.inc | 4 ++- | ||
57 | 18 files changed, 347 insertions(+), 68 deletions(-) | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Drop inline marker: let compiler decide. | ||
1 | 2 | ||
3 | Change return type to uint64_t: this matches the computation in the | ||
4 | return statement and the local variable assignment in the caller. | ||
5 | |||
6 | Rename local to dirty_ring_size_MB to fix typo. | ||
7 | Simplify conversion to MiB via qemu_target_page_bits and right shift. | ||
8 | |||
9 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
10 | Reviewed-by: Thomas Huth <thuth@redhat.com> | ||
11 | Reviewed-by: Juan Quintela <quintela@redhat.com> | ||
12 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
13 | --- | ||
14 | softmmu/dirtylimit.c | 15 ++++++++++----- | ||
15 | 1 file changed, 10 insertions(+), 5 deletions(-) | ||
16 | |||
17 | diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/softmmu/dirtylimit.c | ||
20 | +++ b/softmmu/dirtylimit.c | ||
21 | @@ -XXX,XX +XXX,XX @@ bool dirtylimit_vcpu_index_valid(int cpu_index) | ||
22 | cpu_index >= ms->smp.max_cpus); | ||
23 | } | ||
24 | |||
25 | -static inline int64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate) | ||
26 | +static uint64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate) | ||
27 | { | ||
28 | static uint64_t max_dirtyrate; | ||
29 | - uint32_t dirty_ring_size = kvm_dirty_ring_size(); | ||
30 | - uint64_t dirty_ring_size_meory_MB = | ||
31 | - dirty_ring_size * qemu_target_page_size() >> 20; | ||
32 | + unsigned target_page_bits = qemu_target_page_bits(); | ||
33 | + uint64_t dirty_ring_size_MB; | ||
34 | + | ||
35 | + /* So far, the largest (non-huge) page size is 64k, i.e. 16 bits. */ | ||
36 | + assert(target_page_bits < 20); | ||
37 | + | ||
38 | + /* Convert ring size (pages) to MiB (2**20). */ | ||
39 | + dirty_ring_size_MB = kvm_dirty_ring_size() >> (20 - target_page_bits); | ||
40 | |||
41 | if (max_dirtyrate < dirtyrate) { | ||
42 | max_dirtyrate = dirtyrate; | ||
43 | } | ||
44 | |||
45 | - return dirty_ring_size_meory_MB * 1000000 / max_dirtyrate; | ||
46 | + return dirty_ring_size_MB * 1000000 / max_dirtyrate; | ||
47 | } | ||
48 | |||
49 | static inline bool dirtylimit_done(uint64_t quota, | ||
50 | -- | ||
51 | 2.34.1 | ||
52 | |||
53 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Weiwei Li <liweiwei@iscas.ac.cn> | ||
1 | 2 | ||
3 | When PMP entry overlap part of the page, we'll set the tlb_size to 1, which | ||
4 | will make the address in tlb entry set with TLB_INVALID_MASK, and the next | ||
5 | access will again go through tlb_fill.However, this way will not work in | ||
6 | tb_gen_code() => get_page_addr_code_hostp(): the TLB host address will be | ||
7 | cached, and the following instructions can use this host address directly | ||
8 | which may lead to the bypass of PMP related check. | ||
9 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1542. | ||
10 | |||
11 | Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn> | ||
12 | Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn> | ||
13 | Reviewed-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com> | ||
14 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
15 | Message-Id: <20230422130329.23555-6-liweiwei@iscas.ac.cn> | ||
16 | --- | ||
17 | accel/tcg/cputlb.c | 5 +++++ | ||
18 | 1 file changed, 5 insertions(+) | ||
19 | |||
20 | diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/accel/tcg/cputlb.c | ||
23 | +++ b/accel/tcg/cputlb.c | ||
24 | @@ -XXX,XX +XXX,XX @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, | ||
25 | if (p == NULL) { | ||
26 | return -1; | ||
27 | } | ||
28 | + | ||
29 | + if (full->lg_page_size < TARGET_PAGE_BITS) { | ||
30 | + return -1; | ||
31 | + } | ||
32 | + | ||
33 | if (hostp) { | ||
34 | *hostp = p; | ||
35 | } | ||
36 | -- | ||
37 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Dickon Hood <dickon.hood@codethink.co.uk> | ||
1 | 2 | ||
3 | Rotates have been fixed up to only allow for reasonable rotate amounts | ||
4 | (ie, no rotates >7 on an 8b value etc.) This fixes a problem with riscv | ||
5 | vector rotate instructions. | ||
6 | |||
7 | Signed-off-by: Dickon Hood <dickon.hood@codethink.co.uk> | ||
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | Message-Id: <20230428144757.57530-9-lawrence.hunter@codethink.co.uk> | ||
10 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
11 | --- | ||
12 | include/qemu/bitops.h | 24 ++++++++++++++++-------- | ||
13 | 1 file changed, 16 insertions(+), 8 deletions(-) | ||
14 | |||
15 | diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/include/qemu/bitops.h | ||
18 | +++ b/include/qemu/bitops.h | ||
19 | @@ -XXX,XX +XXX,XX @@ static inline unsigned long find_first_zero_bit(const unsigned long *addr, | ||
20 | */ | ||
21 | static inline uint8_t rol8(uint8_t word, unsigned int shift) | ||
22 | { | ||
23 | - return (word << shift) | (word >> ((8 - shift) & 7)); | ||
24 | + shift &= 7; | ||
25 | + return (word << shift) | (word >> (8 - shift)); | ||
26 | } | ||
27 | |||
28 | /** | ||
29 | @@ -XXX,XX +XXX,XX @@ static inline uint8_t rol8(uint8_t word, unsigned int shift) | ||
30 | */ | ||
31 | static inline uint8_t ror8(uint8_t word, unsigned int shift) | ||
32 | { | ||
33 | - return (word >> shift) | (word << ((8 - shift) & 7)); | ||
34 | + shift &= 7; | ||
35 | + return (word >> shift) | (word << (8 - shift)); | ||
36 | } | ||
37 | |||
38 | /** | ||
39 | @@ -XXX,XX +XXX,XX @@ static inline uint8_t ror8(uint8_t word, unsigned int shift) | ||
40 | */ | ||
41 | static inline uint16_t rol16(uint16_t word, unsigned int shift) | ||
42 | { | ||
43 | - return (word << shift) | (word >> ((16 - shift) & 15)); | ||
44 | + shift &= 15; | ||
45 | + return (word << shift) | (word >> (16 - shift)); | ||
46 | } | ||
47 | |||
48 | /** | ||
49 | @@ -XXX,XX +XXX,XX @@ static inline uint16_t rol16(uint16_t word, unsigned int shift) | ||
50 | */ | ||
51 | static inline uint16_t ror16(uint16_t word, unsigned int shift) | ||
52 | { | ||
53 | - return (word >> shift) | (word << ((16 - shift) & 15)); | ||
54 | + shift &= 15; | ||
55 | + return (word >> shift) | (word << (16 - shift)); | ||
56 | } | ||
57 | |||
58 | /** | ||
59 | @@ -XXX,XX +XXX,XX @@ static inline uint16_t ror16(uint16_t word, unsigned int shift) | ||
60 | */ | ||
61 | static inline uint32_t rol32(uint32_t word, unsigned int shift) | ||
62 | { | ||
63 | - return (word << shift) | (word >> ((32 - shift) & 31)); | ||
64 | + shift &= 31; | ||
65 | + return (word << shift) | (word >> (32 - shift)); | ||
66 | } | ||
67 | |||
68 | /** | ||
69 | @@ -XXX,XX +XXX,XX @@ static inline uint32_t rol32(uint32_t word, unsigned int shift) | ||
70 | */ | ||
71 | static inline uint32_t ror32(uint32_t word, unsigned int shift) | ||
72 | { | ||
73 | - return (word >> shift) | (word << ((32 - shift) & 31)); | ||
74 | + shift &= 31; | ||
75 | + return (word >> shift) | (word << (32 - shift)); | ||
76 | } | ||
77 | |||
78 | /** | ||
79 | @@ -XXX,XX +XXX,XX @@ static inline uint32_t ror32(uint32_t word, unsigned int shift) | ||
80 | */ | ||
81 | static inline uint64_t rol64(uint64_t word, unsigned int shift) | ||
82 | { | ||
83 | - return (word << shift) | (word >> ((64 - shift) & 63)); | ||
84 | + shift &= 63; | ||
85 | + return (word << shift) | (word >> (64 - shift)); | ||
86 | } | ||
87 | |||
88 | /** | ||
89 | @@ -XXX,XX +XXX,XX @@ static inline uint64_t rol64(uint64_t word, unsigned int shift) | ||
90 | */ | ||
91 | static inline uint64_t ror64(uint64_t word, unsigned int shift) | ||
92 | { | ||
93 | - return (word >> shift) | (word << ((64 - shift) & 63)); | ||
94 | + shift &= 63; | ||
95 | + return (word >> shift) | (word << (64 - shift)); | ||
96 | } | ||
97 | |||
98 | /** | ||
99 | -- | ||
100 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk> | ||
1 | 2 | ||
3 | This is for use in the RISC-V vclz and vctz instructions (implemented in | ||
4 | proceeding commit). | ||
5 | |||
6 | Signed-off-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk> | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-Id: <20230428144757.57530-11-lawrence.hunter@codethink.co.uk> | ||
9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | --- | ||
11 | include/qemu/host-utils.h | 54 +++++++++++++++++++++++++++++++++++++++ | ||
12 | 1 file changed, 54 insertions(+) | ||
13 | |||
14 | diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/include/qemu/host-utils.h | ||
17 | +++ b/include/qemu/host-utils.h | ||
18 | @@ -XXX,XX +XXX,XX @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c) | ||
19 | } | ||
20 | #endif | ||
21 | |||
22 | +/** | ||
23 | + * clz8 - count leading zeros in a 8-bit value. | ||
24 | + * @val: The value to search | ||
25 | + * | ||
26 | + * Returns 8 if the value is zero. Note that the GCC builtin is | ||
27 | + * undefined if the value is zero. | ||
28 | + * | ||
29 | + * Note that the GCC builtin will upcast its argument to an `unsigned int` | ||
30 | + * so this function subtracts off the number of prepended zeroes. | ||
31 | + */ | ||
32 | +static inline int clz8(uint8_t val) | ||
33 | +{ | ||
34 | + return val ? __builtin_clz(val) - 24 : 8; | ||
35 | +} | ||
36 | + | ||
37 | +/** | ||
38 | + * clz16 - count leading zeros in a 16-bit value. | ||
39 | + * @val: The value to search | ||
40 | + * | ||
41 | + * Returns 16 if the value is zero. Note that the GCC builtin is | ||
42 | + * undefined if the value is zero. | ||
43 | + * | ||
44 | + * Note that the GCC builtin will upcast its argument to an `unsigned int` | ||
45 | + * so this function subtracts off the number of prepended zeroes. | ||
46 | + */ | ||
47 | +static inline int clz16(uint16_t val) | ||
48 | +{ | ||
49 | + return val ? __builtin_clz(val) - 16 : 16; | ||
50 | +} | ||
51 | + | ||
52 | /** | ||
53 | * clz32 - count leading zeros in a 32-bit value. | ||
54 | * @val: The value to search | ||
55 | @@ -XXX,XX +XXX,XX @@ static inline int clo64(uint64_t val) | ||
56 | return clz64(~val); | ||
57 | } | ||
58 | |||
59 | +/** | ||
60 | + * ctz8 - count trailing zeros in a 8-bit value. | ||
61 | + * @val: The value to search | ||
62 | + * | ||
63 | + * Returns 8 if the value is zero. Note that the GCC builtin is | ||
64 | + * undefined if the value is zero. | ||
65 | + */ | ||
66 | +static inline int ctz8(uint8_t val) | ||
67 | +{ | ||
68 | + return val ? __builtin_ctz(val) : 8; | ||
69 | +} | ||
70 | + | ||
71 | +/** | ||
72 | + * ctz16 - count trailing zeros in a 16-bit value. | ||
73 | + * @val: The value to search | ||
74 | + * | ||
75 | + * Returns 16 if the value is zero. Note that the GCC builtin is | ||
76 | + * undefined if the value is zero. | ||
77 | + */ | ||
78 | +static inline int ctz16(uint16_t val) | ||
79 | +{ | ||
80 | + return val ? __builtin_ctz(val) : 16; | ||
81 | +} | ||
82 | + | ||
83 | /** | ||
84 | * ctz32 - count trailing zeros in a 32-bit value. | ||
85 | * @val: The value to search | ||
86 | -- | ||
87 | 2.34.1 | diff view generated by jsdifflib |
1 | Bitwise operations are easy to fold, because the operation is | 1 | From: Nazar Kazakov <nazar.kazakov@codethink.co.uk> |
---|---|---|---|
2 | identical regardless of element size. But add and sub need | ||
3 | extra element size info that is not currently propagated. | ||
4 | 2 | ||
5 | Fixes: 2f9f08ba43d | 3 | Add tcg expander and helper functions for and-compliment |
6 | Cc: qemu-stable@nongnu.org | 4 | vector with scalar operand. |
7 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/799 | 5 | |
8 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | 6 | Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk> |
7 | Message-Id: <20230428144757.57530-10-lawrence.hunter@codethink.co.uk> | ||
8 | [rth: Split out of larger patch.] | ||
9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
10 | --- | 10 | --- |
11 | tcg/optimize.c | 49 ++++++++++++++++++++++++++++++++++++++----------- | 11 | accel/tcg/tcg-runtime.h | 1 + |
12 | 1 file changed, 38 insertions(+), 11 deletions(-) | 12 | include/tcg/tcg-op-gvec.h | 2 ++ |
13 | accel/tcg/tcg-runtime-gvec.c | 11 +++++++++++ | ||
14 | tcg/tcg-op-gvec.c | 17 +++++++++++++++++ | ||
15 | 4 files changed, 31 insertions(+) | ||
13 | 16 | ||
14 | diff --git a/tcg/optimize.c b/tcg/optimize.c | 17 | diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h |
15 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/tcg/optimize.c | 19 | --- a/accel/tcg/tcg-runtime.h |
17 | +++ b/tcg/optimize.c | 20 | +++ b/accel/tcg/tcg-runtime.h |
18 | @@ -XXX,XX +XXX,XX @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) | 21 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(gvec_nor, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) |
19 | CASE_OP_32_64(mul): | 22 | DEF_HELPER_FLAGS_4(gvec_eqv, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) |
20 | return x * y; | 23 | |
21 | 24 | DEF_HELPER_FLAGS_4(gvec_ands, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | |
22 | - CASE_OP_32_64(and): | 25 | +DEF_HELPER_FLAGS_4(gvec_andcs, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) |
23 | + CASE_OP_32_64_VEC(and): | 26 | DEF_HELPER_FLAGS_4(gvec_xors, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) |
24 | return x & y; | 27 | DEF_HELPER_FLAGS_4(gvec_ors, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) |
25 | 28 | ||
26 | - CASE_OP_32_64(or): | 29 | diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h |
27 | + CASE_OP_32_64_VEC(or): | 30 | index XXXXXXX..XXXXXXX 100644 |
28 | return x | y; | 31 | --- a/include/tcg/tcg-op-gvec.h |
29 | 32 | +++ b/include/tcg/tcg-op-gvec.h | |
30 | - CASE_OP_32_64(xor): | 33 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_ori(unsigned vece, uint32_t dofs, uint32_t aofs, |
31 | + CASE_OP_32_64_VEC(xor): | 34 | |
32 | return x ^ y; | 35 | void tcg_gen_gvec_ands(unsigned vece, uint32_t dofs, uint32_t aofs, |
33 | 36 | TCGv_i64 c, uint32_t oprsz, uint32_t maxsz); | |
34 | case INDEX_op_shl_i32: | 37 | +void tcg_gen_gvec_andcs(unsigned vece, uint32_t dofs, uint32_t aofs, |
35 | @@ -XXX,XX +XXX,XX @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) | 38 | + TCGv_i64 c, uint32_t oprsz, uint32_t maxsz); |
36 | case INDEX_op_rotl_i64: | 39 | void tcg_gen_gvec_xors(unsigned vece, uint32_t dofs, uint32_t aofs, |
37 | return rol64(x, y & 63); | 40 | TCGv_i64 c, uint32_t oprsz, uint32_t maxsz); |
38 | 41 | void tcg_gen_gvec_ors(unsigned vece, uint32_t dofs, uint32_t aofs, | |
39 | - CASE_OP_32_64(not): | 42 | diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c |
40 | + CASE_OP_32_64_VEC(not): | 43 | index XXXXXXX..XXXXXXX 100644 |
41 | return ~x; | 44 | --- a/accel/tcg/tcg-runtime-gvec.c |
42 | 45 | +++ b/accel/tcg/tcg-runtime-gvec.c | |
43 | CASE_OP_32_64(neg): | 46 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc) |
44 | return -x; | 47 | clear_high(d, oprsz, desc); |
45 | |||
46 | - CASE_OP_32_64(andc): | ||
47 | + CASE_OP_32_64_VEC(andc): | ||
48 | return x & ~y; | ||
49 | |||
50 | - CASE_OP_32_64(orc): | ||
51 | + CASE_OP_32_64_VEC(orc): | ||
52 | return x | ~y; | ||
53 | |||
54 | CASE_OP_32_64(eqv): | ||
55 | @@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op) | ||
56 | return false; | ||
57 | } | 48 | } |
58 | 49 | ||
59 | +static bool fold_commutative(OptContext *ctx, TCGOp *op) | 50 | +void HELPER(gvec_andcs)(void *d, void *a, uint64_t b, uint32_t desc) |
60 | +{ | 51 | +{ |
61 | + swap_commutative(op->args[0], &op->args[1], &op->args[2]); | 52 | + intptr_t oprsz = simd_oprsz(desc); |
62 | + return false; | 53 | + intptr_t i; |
54 | + | ||
55 | + for (i = 0; i < oprsz; i += sizeof(uint64_t)) { | ||
56 | + *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & ~b; | ||
57 | + } | ||
58 | + clear_high(d, oprsz, desc); | ||
63 | +} | 59 | +} |
64 | + | 60 | + |
65 | static bool fold_const2_commutative(OptContext *ctx, TCGOp *op) | 61 | void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc) |
66 | { | 62 | { |
67 | swap_commutative(op->args[0], &op->args[1], &op->args[2]); | 63 | intptr_t oprsz = simd_oprsz(desc); |
68 | @@ -XXX,XX +XXX,XX @@ static bool fold_add(OptContext *ctx, TCGOp *op) | 64 | diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c |
69 | return false; | 65 | index XXXXXXX..XXXXXXX 100644 |
66 | --- a/tcg/tcg-op-gvec.c | ||
67 | +++ b/tcg/tcg-op-gvec.c | ||
68 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_andi(unsigned vece, uint32_t dofs, uint32_t aofs, | ||
69 | tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_ands); | ||
70 | } | 70 | } |
71 | 71 | ||
72 | +/* We cannot as yet do_constant_folding with vectors. */ | 72 | +void tcg_gen_gvec_andcs(unsigned vece, uint32_t dofs, uint32_t aofs, |
73 | +static bool fold_add_vec(OptContext *ctx, TCGOp *op) | 73 | + TCGv_i64 c, uint32_t oprsz, uint32_t maxsz) |
74 | +{ | 74 | +{ |
75 | + if (fold_commutative(ctx, op) || | 75 | + static GVecGen2s g = { |
76 | + fold_xi_to_x(ctx, op, 0)) { | 76 | + .fni8 = tcg_gen_andc_i64, |
77 | + return true; | 77 | + .fniv = tcg_gen_andc_vec, |
78 | + } | 78 | + .fno = gen_helper_gvec_andcs, |
79 | + return false; | 79 | + .prefer_i64 = TCG_TARGET_REG_BITS == 64, |
80 | + .vece = MO_64 | ||
81 | + }; | ||
82 | + | ||
83 | + TCGv_i64 tmp = tcg_temp_ebb_new_i64(); | ||
84 | + tcg_gen_dup_i64(vece, tmp, c); | ||
85 | + tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, c, &g); | ||
86 | + tcg_temp_free_i64(tmp); | ||
80 | +} | 87 | +} |
81 | + | 88 | + |
82 | static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add) | 89 | static const GVecGen2s gop_xors = { |
83 | { | 90 | .fni8 = tcg_gen_xor_i64, |
84 | if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) && | 91 | .fniv = tcg_gen_xor_vec, |
85 | @@ -XXX,XX +XXX,XX @@ static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op) | ||
86 | return false; | ||
87 | } | ||
88 | |||
89 | -static bool fold_sub(OptContext *ctx, TCGOp *op) | ||
90 | +/* We cannot as yet do_constant_folding with vectors. */ | ||
91 | +static bool fold_sub_vec(OptContext *ctx, TCGOp *op) | ||
92 | { | ||
93 | - if (fold_const2(ctx, op) || | ||
94 | - fold_xx_to_i(ctx, op, 0) || | ||
95 | + if (fold_xx_to_i(ctx, op, 0) || | ||
96 | fold_xi_to_x(ctx, op, 0) || | ||
97 | fold_sub_to_neg(ctx, op)) { | ||
98 | return true; | ||
99 | @@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op) | ||
100 | return false; | ||
101 | } | ||
102 | |||
103 | +static bool fold_sub(OptContext *ctx, TCGOp *op) | ||
104 | +{ | ||
105 | + return fold_const2(ctx, op) || fold_sub_vec(ctx, op); | ||
106 | +} | ||
107 | + | ||
108 | static bool fold_sub2(OptContext *ctx, TCGOp *op) | ||
109 | { | ||
110 | return fold_addsub2(ctx, op, false); | ||
111 | @@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s) | ||
112 | * Sorted alphabetically by opcode as much as possible. | ||
113 | */ | ||
114 | switch (opc) { | ||
115 | - CASE_OP_32_64_VEC(add): | ||
116 | + CASE_OP_32_64(add): | ||
117 | done = fold_add(&ctx, op); | ||
118 | break; | ||
119 | + case INDEX_op_add_vec: | ||
120 | + done = fold_add_vec(&ctx, op); | ||
121 | + break; | ||
122 | CASE_OP_32_64(add2): | ||
123 | done = fold_add2(&ctx, op); | ||
124 | break; | ||
125 | @@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s) | ||
126 | CASE_OP_32_64(sextract): | ||
127 | done = fold_sextract(&ctx, op); | ||
128 | break; | ||
129 | - CASE_OP_32_64_VEC(sub): | ||
130 | + CASE_OP_32_64(sub): | ||
131 | done = fold_sub(&ctx, op); | ||
132 | break; | ||
133 | + case INDEX_op_sub_vec: | ||
134 | + done = fold_sub_vec(&ctx, op); | ||
135 | + break; | ||
136 | CASE_OP_32_64(sub2): | ||
137 | done = fold_sub2(&ctx, op); | ||
138 | break; | ||
139 | -- | 92 | -- |
140 | 2.25.1 | 93 | 2.34.1 |
141 | |||
142 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Nazar Kazakov <nazar.kazakov@codethink.co.uk> | ||
1 | 2 | ||
3 | Add tcg expander and helper functions for rotate right | ||
4 | vector with scalar operand. | ||
5 | |||
6 | Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk> | ||
7 | Message-Id: <20230428144757.57530-10-lawrence.hunter@codethink.co.uk> | ||
8 | [rth: Split out of larger patch; mask rotation count.] | ||
9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | --- | ||
11 | include/tcg/tcg-op-gvec.h | 2 ++ | ||
12 | tcg/tcg-op-gvec.c | 11 +++++++++++ | ||
13 | 2 files changed, 13 insertions(+) | ||
14 | |||
15 | diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/include/tcg/tcg-op-gvec.h | ||
18 | +++ b/include/tcg/tcg-op-gvec.h | ||
19 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_sars(unsigned vece, uint32_t dofs, uint32_t aofs, | ||
20 | TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz); | ||
21 | void tcg_gen_gvec_rotls(unsigned vece, uint32_t dofs, uint32_t aofs, | ||
22 | TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz); | ||
23 | +void tcg_gen_gvec_rotrs(unsigned vece, uint32_t dofs, uint32_t aofs, | ||
24 | + TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz); | ||
25 | |||
26 | /* | ||
27 | * Perform vector shift by vector element, modulo the element size. | ||
28 | diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/tcg/tcg-op-gvec.c | ||
31 | +++ b/tcg/tcg-op-gvec.c | ||
32 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_rotls(unsigned vece, uint32_t dofs, uint32_t aofs, | ||
33 | do_gvec_shifts(vece, dofs, aofs, shift, oprsz, maxsz, &g); | ||
34 | } | ||
35 | |||
36 | +void tcg_gen_gvec_rotrs(unsigned vece, uint32_t dofs, uint32_t aofs, | ||
37 | + TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz) | ||
38 | +{ | ||
39 | + TCGv_i32 tmp = tcg_temp_ebb_new_i32(); | ||
40 | + | ||
41 | + tcg_gen_neg_i32(tmp, shift); | ||
42 | + tcg_gen_andi_i32(tmp, tmp, (8 << vece) - 1); | ||
43 | + tcg_gen_gvec_rotls(vece, dofs, aofs, tmp, oprsz, maxsz); | ||
44 | + tcg_temp_free_i32(tmp); | ||
45 | +} | ||
46 | + | ||
47 | /* | ||
48 | * Expand D = A << (B % element bits) | ||
49 | * | ||
50 | -- | ||
51 | 2.34.1 | diff view generated by jsdifflib |
1 | For the ABIs in which the syscall return register is not | 1 | Clang 14, with --enable-tcg-interpreter errors with |
---|---|---|---|
2 | also the first function argument register, move the errno | ||
3 | value into the correct place. | ||
4 | 2 | ||
5 | Fixes: a3310c0397e2 ("linux-user: Move syscall error detection into safe_syscall_base") | 3 | include/qemu/int128.h:487:16: error: alignment of field 'i' (128 bits) |
6 | Reported-by: Laurent Vivier <laurent@vivier.eu> | 4 | does not match the alignment of the first field in transparent union; |
7 | Tested-by: Laurent Vivier <laurent@vivier.eu> | 5 | transparent_union attribute ignored [-Werror,-Wignored-attributes] |
8 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | 6 | __int128_t i; |
7 | ^ | ||
8 | include/qemu/int128.h:486:12: note: alignment of first field is 64 bits | ||
9 | Int128 s; | ||
10 | ^ | ||
11 | 1 error generated. | ||
12 | |||
13 | By placing the __uint128_t member first, this is avoided. | ||
14 | |||
9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 15 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
10 | Message-Id: <20220104190454.542225-1-richard.henderson@linaro.org> | 16 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> |
17 | Message-Id: <20230501204625.277361-1-richard.henderson@linaro.org> | ||
11 | --- | 18 | --- |
12 | common-user/host/i386/safe-syscall.inc.S | 1 + | 19 | include/qemu/int128.h | 4 ++-- |
13 | common-user/host/mips/safe-syscall.inc.S | 1 + | 20 | 1 file changed, 2 insertions(+), 2 deletions(-) |
14 | common-user/host/x86_64/safe-syscall.inc.S | 1 + | ||
15 | 3 files changed, 3 insertions(+) | ||
16 | 21 | ||
17 | diff --git a/common-user/host/i386/safe-syscall.inc.S b/common-user/host/i386/safe-syscall.inc.S | 22 | diff --git a/include/qemu/int128.h b/include/qemu/int128.h |
18 | index XXXXXXX..XXXXXXX 100644 | 23 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/common-user/host/i386/safe-syscall.inc.S | 24 | --- a/include/qemu/int128.h |
20 | +++ b/common-user/host/i386/safe-syscall.inc.S | 25 | +++ b/include/qemu/int128.h |
21 | @@ -XXX,XX +XXX,XX @@ safe_syscall_end: | 26 | @@ -XXX,XX +XXX,XX @@ static inline void bswap128s(Int128 *s) |
22 | pop %ebp | 27 | */ |
23 | .cfi_adjust_cfa_offset -4 | 28 | #ifdef CONFIG_INT128 |
24 | .cfi_restore ebp | 29 | typedef union { |
25 | + mov %eax, (%esp) | 30 | - Int128 s; |
26 | jmp safe_syscall_set_errno_tail | 31 | - __int128_t i; |
27 | 32 | __uint128_t u; | |
28 | .cfi_endproc | 33 | + __int128_t i; |
29 | diff --git a/common-user/host/mips/safe-syscall.inc.S b/common-user/host/mips/safe-syscall.inc.S | 34 | + Int128 s; |
30 | index XXXXXXX..XXXXXXX 100644 | 35 | } Int128Alias __attribute__((transparent_union)); |
31 | --- a/common-user/host/mips/safe-syscall.inc.S | 36 | #else |
32 | +++ b/common-user/host/mips/safe-syscall.inc.S | 37 | typedef Int128 Int128Alias; |
33 | @@ -XXX,XX +XXX,XX @@ safe_syscall_end: | ||
34 | 1: USE_ALT_CP(t0) | ||
35 | SETUP_GPX(t1) | ||
36 | SETUP_GPX64(t0, t1) | ||
37 | + move a0, v0 | ||
38 | PTR_LA t9, safe_syscall_set_errno_tail | ||
39 | jr t9 | ||
40 | |||
41 | diff --git a/common-user/host/x86_64/safe-syscall.inc.S b/common-user/host/x86_64/safe-syscall.inc.S | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/common-user/host/x86_64/safe-syscall.inc.S | ||
44 | +++ b/common-user/host/x86_64/safe-syscall.inc.S | ||
45 | @@ -XXX,XX +XXX,XX @@ safe_syscall_end: | ||
46 | 1: pop %rbp | ||
47 | .cfi_def_cfa_offset 8 | ||
48 | .cfi_restore rbp | ||
49 | + mov %eax, %edi | ||
50 | jmp safe_syscall_set_errno_tail | ||
51 | .cfi_endproc | ||
52 | |||
53 | -- | 38 | -- |
54 | 2.25.1 | 39 | 2.34.1 |
55 | 40 | ||
56 | 41 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Use the attribute, which is supported by clang, instead of | ||
2 | the #pragma, which is not supported and, for some reason, | ||
3 | also not detected by the meson probe, so we fail by -Werror. | ||
1 | 4 | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Reviewed-by: Juan Quintela <quintela@redhat.com> | ||
7 | Message-Id: <20230501210555.289806-1-richard.henderson@linaro.org> | ||
8 | --- | ||
9 | meson.build | 5 +---- | ||
10 | migration/xbzrle.c | 9 ++++----- | ||
11 | 2 files changed, 5 insertions(+), 9 deletions(-) | ||
12 | |||
13 | diff --git a/meson.build b/meson.build | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/meson.build | ||
16 | +++ b/meson.build | ||
17 | @@ -XXX,XX +XXX,XX @@ config_host_data.set('CONFIG_AVX512F_OPT', get_option('avx512f') \ | ||
18 | config_host_data.set('CONFIG_AVX512BW_OPT', get_option('avx512bw') \ | ||
19 | .require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX512BW') \ | ||
20 | .require(cc.links(''' | ||
21 | - #pragma GCC push_options | ||
22 | - #pragma GCC target("avx512bw") | ||
23 | #include <cpuid.h> | ||
24 | #include <immintrin.h> | ||
25 | - static int bar(void *a) { | ||
26 | - | ||
27 | + static int __attribute__((target("avx512bw"))) bar(void *a) { | ||
28 | __m512i *x = a; | ||
29 | __m512i res= _mm512_abs_epi8(*x); | ||
30 | return res[1]; | ||
31 | diff --git a/migration/xbzrle.c b/migration/xbzrle.c | ||
32 | index XXXXXXX..XXXXXXX 100644 | ||
33 | --- a/migration/xbzrle.c | ||
34 | +++ b/migration/xbzrle.c | ||
35 | @@ -XXX,XX +XXX,XX @@ int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen) | ||
36 | } | ||
37 | |||
38 | #if defined(CONFIG_AVX512BW_OPT) | ||
39 | -#pragma GCC push_options | ||
40 | -#pragma GCC target("avx512bw") | ||
41 | #include <immintrin.h> | ||
42 | -int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen, | ||
43 | - uint8_t *dst, int dlen) | ||
44 | + | ||
45 | +int __attribute__((target("avx512bw"))) | ||
46 | +xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen, | ||
47 | + uint8_t *dst, int dlen) | ||
48 | { | ||
49 | uint32_t zrun_len = 0, nzrun_len = 0; | ||
50 | int d = 0, i = 0, num = 0; | ||
51 | @@ -XXX,XX +XXX,XX @@ int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen, | ||
52 | } | ||
53 | return d; | ||
54 | } | ||
55 | -#pragma GCC pop_options | ||
56 | #endif | ||
57 | -- | ||
58 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | At least RISC-V has the need to be able to perform a read | ||
2 | using execute permissions, outside of translation. | ||
3 | Add helpers to facilitate this. | ||
1 | 4 | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | ||
7 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | ||
8 | Tested-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
9 | Message-Id: <20230325105429.1142530-9-richard.henderson@linaro.org> | ||
10 | Message-Id: <20230412114333.118895-9-richard.henderson@linaro.org> | ||
11 | --- | ||
12 | include/exec/cpu_ldst.h | 9 +++++++ | ||
13 | accel/tcg/cputlb.c | 48 ++++++++++++++++++++++++++++++++++ | ||
14 | accel/tcg/user-exec.c | 58 +++++++++++++++++++++++++++++++++++++++++ | ||
15 | 3 files changed, 115 insertions(+) | ||
16 | |||
17 | diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/include/exec/cpu_ldst.h | ||
20 | +++ b/include/exec/cpu_ldst.h | ||
21 | @@ -XXX,XX +XXX,XX @@ static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx, | ||
22 | # define cpu_stq_mmu cpu_stq_le_mmu | ||
23 | #endif | ||
24 | |||
25 | +uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr, | ||
26 | + MemOpIdx oi, uintptr_t ra); | ||
27 | +uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr, | ||
28 | + MemOpIdx oi, uintptr_t ra); | ||
29 | +uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr, | ||
30 | + MemOpIdx oi, uintptr_t ra); | ||
31 | +uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr, | ||
32 | + MemOpIdx oi, uintptr_t ra); | ||
33 | + | ||
34 | uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr); | ||
35 | uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr); | ||
36 | uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr); | ||
37 | diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/accel/tcg/cputlb.c | ||
40 | +++ b/accel/tcg/cputlb.c | ||
41 | @@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr) | ||
42 | MemOpIdx oi = make_memop_idx(MO_TEUQ, cpu_mmu_index(env, true)); | ||
43 | return full_ldq_code(env, addr, oi, 0); | ||
44 | } | ||
45 | + | ||
46 | +uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr, | ||
47 | + MemOpIdx oi, uintptr_t retaddr) | ||
48 | +{ | ||
49 | + return full_ldub_code(env, addr, oi, retaddr); | ||
50 | +} | ||
51 | + | ||
52 | +uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr, | ||
53 | + MemOpIdx oi, uintptr_t retaddr) | ||
54 | +{ | ||
55 | + MemOp mop = get_memop(oi); | ||
56 | + int idx = get_mmuidx(oi); | ||
57 | + uint16_t ret; | ||
58 | + | ||
59 | + ret = full_lduw_code(env, addr, make_memop_idx(MO_TEUW, idx), retaddr); | ||
60 | + if ((mop & MO_BSWAP) != MO_TE) { | ||
61 | + ret = bswap16(ret); | ||
62 | + } | ||
63 | + return ret; | ||
64 | +} | ||
65 | + | ||
66 | +uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr, | ||
67 | + MemOpIdx oi, uintptr_t retaddr) | ||
68 | +{ | ||
69 | + MemOp mop = get_memop(oi); | ||
70 | + int idx = get_mmuidx(oi); | ||
71 | + uint32_t ret; | ||
72 | + | ||
73 | + ret = full_ldl_code(env, addr, make_memop_idx(MO_TEUL, idx), retaddr); | ||
74 | + if ((mop & MO_BSWAP) != MO_TE) { | ||
75 | + ret = bswap32(ret); | ||
76 | + } | ||
77 | + return ret; | ||
78 | +} | ||
79 | + | ||
80 | +uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr, | ||
81 | + MemOpIdx oi, uintptr_t retaddr) | ||
82 | +{ | ||
83 | + MemOp mop = get_memop(oi); | ||
84 | + int idx = get_mmuidx(oi); | ||
85 | + uint64_t ret; | ||
86 | + | ||
87 | + ret = full_ldq_code(env, addr, make_memop_idx(MO_TEUQ, idx), retaddr); | ||
88 | + if ((mop & MO_BSWAP) != MO_TE) { | ||
89 | + ret = bswap64(ret); | ||
90 | + } | ||
91 | + return ret; | ||
92 | +} | ||
93 | diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c | ||
94 | index XXXXXXX..XXXXXXX 100644 | ||
95 | --- a/accel/tcg/user-exec.c | ||
96 | +++ b/accel/tcg/user-exec.c | ||
97 | @@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr ptr) | ||
98 | return ret; | ||
99 | } | ||
100 | |||
101 | +uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr, | ||
102 | + MemOpIdx oi, uintptr_t ra) | ||
103 | +{ | ||
104 | + void *haddr; | ||
105 | + uint8_t ret; | ||
106 | + | ||
107 | + haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_INST_FETCH); | ||
108 | + ret = ldub_p(haddr); | ||
109 | + clear_helper_retaddr(); | ||
110 | + return ret; | ||
111 | +} | ||
112 | + | ||
113 | +uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr, | ||
114 | + MemOpIdx oi, uintptr_t ra) | ||
115 | +{ | ||
116 | + void *haddr; | ||
117 | + uint16_t ret; | ||
118 | + | ||
119 | + haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_INST_FETCH); | ||
120 | + ret = lduw_p(haddr); | ||
121 | + clear_helper_retaddr(); | ||
122 | + if (get_memop(oi) & MO_BSWAP) { | ||
123 | + ret = bswap16(ret); | ||
124 | + } | ||
125 | + return ret; | ||
126 | +} | ||
127 | + | ||
128 | +uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr, | ||
129 | + MemOpIdx oi, uintptr_t ra) | ||
130 | +{ | ||
131 | + void *haddr; | ||
132 | + uint32_t ret; | ||
133 | + | ||
134 | + haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_INST_FETCH); | ||
135 | + ret = ldl_p(haddr); | ||
136 | + clear_helper_retaddr(); | ||
137 | + if (get_memop(oi) & MO_BSWAP) { | ||
138 | + ret = bswap32(ret); | ||
139 | + } | ||
140 | + return ret; | ||
141 | +} | ||
142 | + | ||
143 | +uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr, | ||
144 | + MemOpIdx oi, uintptr_t ra) | ||
145 | +{ | ||
146 | + void *haddr; | ||
147 | + uint64_t ret; | ||
148 | + | ||
149 | + validate_memop(oi, MO_BEUQ); | ||
150 | + haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD); | ||
151 | + ret = ldq_p(haddr); | ||
152 | + clear_helper_retaddr(); | ||
153 | + if (get_memop(oi) & MO_BSWAP) { | ||
154 | + ret = bswap64(ret); | ||
155 | + } | ||
156 | + return ret; | ||
157 | +} | ||
158 | + | ||
159 | #include "ldst_common.c.inc" | ||
160 | |||
161 | /* | ||
162 | -- | ||
163 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Xiaoyao Li <xiaoyao.li@intel.com> | 1 | Since TCG_TYPE_I32 values are kept sign-extended in registers, |
---|---|---|---|
2 | via ".w" instructions, we need not extend if the register matches. | ||
3 | This is already relied upon by comparisons. | ||
2 | 4 | ||
3 | Remove qemu_run_machine_init_done_notifiers() since no implementation | 5 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
4 | and user. | ||
5 | |||
6 | Fixes: f66dc8737c9 ("vl: move all generic initialization out of vl.c") | ||
7 | Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com> | ||
8 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
9 | Message-Id: <20220104024136.1433545-1-xiaoyao.li@intel.com> | ||
10 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
11 | --- | 7 | --- |
12 | include/sysemu/sysemu.h | 1 - | 8 | tcg/loongarch64/tcg-target.c.inc | 4 +++- |
13 | 1 file changed, 1 deletion(-) | 9 | 1 file changed, 3 insertions(+), 1 deletion(-) |
14 | 10 | ||
15 | diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h | 11 | diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc |
16 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/include/sysemu/sysemu.h | 13 | --- a/tcg/loongarch64/tcg-target.c.inc |
18 | +++ b/include/sysemu/sysemu.h | 14 | +++ b/tcg/loongarch64/tcg-target.c.inc |
19 | @@ -XXX,XX +XXX,XX @@ extern bool qemu_uuid_set; | 15 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg) |
20 | void qemu_add_exit_notifier(Notifier *notify); | 16 | |
21 | void qemu_remove_exit_notifier(Notifier *notify); | 17 | static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg) |
22 | 18 | { | |
23 | -void qemu_run_machine_init_done_notifiers(void); | 19 | - tcg_out_ext32s(s, ret, arg); |
24 | void qemu_add_machine_init_done_notifier(Notifier *notify); | 20 | + if (ret != arg) { |
25 | void qemu_remove_machine_init_done_notifier(Notifier *notify); | 21 | + tcg_out_ext32s(s, ret, arg); |
26 | 22 | + } | |
23 | } | ||
24 | |||
25 | static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg) | ||
27 | -- | 26 | -- |
28 | 2.25.1 | 27 | 2.34.1 |
29 | 28 | ||
30 | 29 | diff view generated by jsdifflib |
1 | From: Philippe Mathieu-Daudé <f4bug@amsat.org> | 1 | Since TCG_TYPE_I32 values are kept sign-extended in registers, we need not |
---|---|---|---|
2 | extend if the register matches. This is already relied upon by comparisons. | ||
2 | 3 | ||
3 | When building using GCC 8.3.0 on loongarch64 (Loongnix) we get: | 4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
4 | |||
5 | In file included from ../linux-user/signal.c:33: | ||
6 | ../linux-user/host/loongarch64/host-signal.h: In function ‘host_signal_write’: | ||
7 | ../linux-user/host/loongarch64/host-signal.h:57:9: error: a label can only be part of a statement and a declaration is not a statement | ||
8 | uint32_t sel = (insn >> 15) & 0b11111111111; | ||
9 | ^~~~~~~~ | ||
10 | |||
11 | We don't use the 'sel' variable more than once, so drop it. | ||
12 | |||
13 | Meson output for the record: | ||
14 | |||
15 | Host machine cpu family: loongarch64 | ||
16 | Host machine cpu: loongarch64 | ||
17 | C compiler for the host machine: cc (gcc 8.3.0 "cc (Loongnix 8.3.0-6.lnd.vec.27) 8.3.0") | ||
18 | C linker for the host machine: cc ld.bfd 2.31.1-system | ||
19 | |||
20 | Fixes: ad812c3bd65 ("linux-user: Implement CPU-specific signal handler for loongarch64 hosts") | ||
21 | Reported-by: Song Gao <gaosong@loongson.cn> | ||
22 | Suggested-by: Song Gao <gaosong@loongson.cn> | ||
23 | Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
24 | Reviewed-by: WANG Xuerui <git@xen0n.name> | ||
25 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
26 | Message-Id: <20220104215027.2180972-1-f4bug@amsat.org> | ||
27 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
28 | --- | 6 | --- |
29 | linux-user/host/loongarch64/host-signal.h | 4 +--- | 7 | tcg/mips/tcg-target.c.inc | 4 +++- |
30 | 1 file changed, 1 insertion(+), 3 deletions(-) | 8 | 1 file changed, 3 insertions(+), 1 deletion(-) |
31 | 9 | ||
32 | diff --git a/linux-user/host/loongarch64/host-signal.h b/linux-user/host/loongarch64/host-signal.h | 10 | diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc |
33 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
34 | --- a/linux-user/host/loongarch64/host-signal.h | 12 | --- a/tcg/mips/tcg-target.c.inc |
35 | +++ b/linux-user/host/loongarch64/host-signal.h | 13 | +++ b/tcg/mips/tcg-target.c.inc |
36 | @@ -XXX,XX +XXX,XX @@ static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) | 14 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rs) |
37 | } | 15 | |
38 | break; | 16 | static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rs) |
39 | case 0b001110: /* indexed, atomic, bounds-checking memory operations */ | 17 | { |
40 | - uint32_t sel = (insn >> 15) & 0b11111111111; | 18 | - tcg_out_ext32s(s, rd, rs); |
41 | - | 19 | + if (rd != rs) { |
42 | - switch (sel) { | 20 | + tcg_out_ext32s(s, rd, rs); |
43 | + switch ((insn >> 15) & 0b11111111111) { | 21 | + } |
44 | case 0b00000100000: /* stx.b */ | 22 | } |
45 | case 0b00000101000: /* stx.h */ | 23 | |
46 | case 0b00000110000: /* stx.w */ | 24 | static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rs) |
47 | -- | 25 | -- |
48 | 2.25.1 | 26 | 2.34.1 |
49 | 27 | ||
50 | 28 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | This is common code in most qemu_{ld,st} slow paths, moving two | |
2 | registers when there may be overlap between sources and destinations. | ||
3 | At present, this is only used by 32-bit hosts for 64-bit data, | ||
4 | but will shortly be used for more than that. | ||
5 | |||
6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | --- | ||
9 | tcg/tcg.c | 69 ++++++++++++++++++++++++++++++++++++--- | ||
10 | tcg/arm/tcg-target.c.inc | 44 ++++++++++--------------- | ||
11 | tcg/i386/tcg-target.c.inc | 19 +++++------ | ||
12 | 3 files changed, 90 insertions(+), 42 deletions(-) | ||
13 | |||
14 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/tcg/tcg.c | ||
17 | +++ b/tcg/tcg.c | ||
18 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); | ||
19 | static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); | ||
20 | static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg); | ||
21 | static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long); | ||
22 | -static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2) | ||
23 | - __attribute__((unused)); | ||
24 | +static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2); | ||
25 | static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); | ||
26 | static void tcg_out_goto_tb(TCGContext *s, int which); | ||
27 | static void tcg_out_op(TCGContext *s, TCGOpcode opc, | ||
28 | @@ -XXX,XX +XXX,XX @@ void tcg_raise_tb_overflow(TCGContext *s) | ||
29 | siglongjmp(s->jmp_trans, -2); | ||
30 | } | ||
31 | |||
32 | +typedef struct TCGMovExtend { | ||
33 | + TCGReg dst; | ||
34 | + TCGReg src; | ||
35 | + TCGType dst_type; | ||
36 | + TCGType src_type; | ||
37 | + MemOp src_ext; | ||
38 | +} TCGMovExtend; | ||
39 | + | ||
40 | /** | ||
41 | * tcg_out_movext -- move and extend | ||
42 | * @s: tcg context | ||
43 | @@ -XXX,XX +XXX,XX @@ void tcg_raise_tb_overflow(TCGContext *s) | ||
44 | * | ||
45 | * Move or extend @src into @dst, depending on @src_ext and the types. | ||
46 | */ | ||
47 | -static void __attribute__((unused)) | ||
48 | -tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst, | ||
49 | - TCGType src_type, MemOp src_ext, TCGReg src) | ||
50 | +static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst, | ||
51 | + TCGType src_type, MemOp src_ext, TCGReg src) | ||
52 | { | ||
53 | switch (src_ext) { | ||
54 | case MO_UB: | ||
55 | @@ -XXX,XX +XXX,XX @@ tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst, | ||
56 | } | ||
57 | } | ||
58 | |||
59 | +/* Minor variations on a theme, using a structure. */ | ||
60 | +static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i, | ||
61 | + TCGReg src) | ||
62 | +{ | ||
63 | + tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src); | ||
64 | +} | ||
65 | + | ||
66 | +static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i) | ||
67 | +{ | ||
68 | + tcg_out_movext1_new_src(s, i, i->src); | ||
69 | +} | ||
70 | + | ||
71 | +/** | ||
72 | + * tcg_out_movext2 -- move and extend two pair | ||
73 | + * @s: tcg context | ||
74 | + * @i1: first move description | ||
75 | + * @i2: second move description | ||
76 | + * @scratch: temporary register, or -1 for none | ||
77 | + * | ||
78 | + * As tcg_out_movext, for both @i1 and @i2, caring for overlap | ||
79 | + * between the sources and destinations. | ||
80 | + */ | ||
81 | + | ||
82 | +static void __attribute__((unused)) | ||
83 | +tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1, | ||
84 | + const TCGMovExtend *i2, int scratch) | ||
85 | +{ | ||
86 | + TCGReg src1 = i1->src; | ||
87 | + TCGReg src2 = i2->src; | ||
88 | + | ||
89 | + if (i1->dst != src2) { | ||
90 | + tcg_out_movext1(s, i1); | ||
91 | + tcg_out_movext1(s, i2); | ||
92 | + return; | ||
93 | + } | ||
94 | + if (i2->dst == src1) { | ||
95 | + TCGType src1_type = i1->src_type; | ||
96 | + TCGType src2_type = i2->src_type; | ||
97 | + | ||
98 | + if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) { | ||
99 | + /* The data is now in the correct registers, now extend. */ | ||
100 | + src1 = i2->src; | ||
101 | + src2 = i1->src; | ||
102 | + } else { | ||
103 | + tcg_debug_assert(scratch >= 0); | ||
104 | + tcg_out_mov(s, src1_type, scratch, src1); | ||
105 | + src1 = scratch; | ||
106 | + } | ||
107 | + } | ||
108 | + tcg_out_movext1_new_src(s, i2, src2); | ||
109 | + tcg_out_movext1_new_src(s, i1, src1); | ||
110 | +} | ||
111 | + | ||
112 | #define C_PFX1(P, A) P##A | ||
113 | #define C_PFX2(P, A, B) P##A##_##B | ||
114 | #define C_PFX3(P, A, B, C) P##A##_##B##_##C | ||
115 | diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc | ||
116 | index XXXXXXX..XXXXXXX 100644 | ||
117 | --- a/tcg/arm/tcg-target.c.inc | ||
118 | +++ b/tcg/arm/tcg-target.c.inc | ||
119 | @@ -XXX,XX +XXX,XX @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi, | ||
120 | |||
121 | static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) | ||
122 | { | ||
123 | - TCGReg argreg, datalo, datahi; | ||
124 | + TCGReg argreg; | ||
125 | MemOpIdx oi = lb->oi; | ||
126 | MemOp opc = get_memop(oi); | ||
127 | |||
128 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) | ||
129 | /* Use the canonical unsigned helpers and minimize icache usage. */ | ||
130 | tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]); | ||
131 | |||
132 | - datalo = lb->datalo_reg; | ||
133 | - datahi = lb->datahi_reg; | ||
134 | if ((opc & MO_SIZE) == MO_64) { | ||
135 | - if (datalo != TCG_REG_R1) { | ||
136 | - tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0); | ||
137 | - tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1); | ||
138 | - } else if (datahi != TCG_REG_R0) { | ||
139 | - tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1); | ||
140 | - tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0); | ||
141 | - } else { | ||
142 | - tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0); | ||
143 | - tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1); | ||
144 | - tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_TMP); | ||
145 | - } | ||
146 | + TCGMovExtend ext[2] = { | ||
147 | + { .dst = lb->datalo_reg, .dst_type = TCG_TYPE_I32, | ||
148 | + .src = TCG_REG_R0, .src_type = TCG_TYPE_I32, .src_ext = MO_UL }, | ||
149 | + { .dst = lb->datahi_reg, .dst_type = TCG_TYPE_I32, | ||
150 | + .src = TCG_REG_R1, .src_type = TCG_TYPE_I32, .src_ext = MO_UL }, | ||
151 | + }; | ||
152 | + tcg_out_movext2(s, &ext[0], &ext[1], TCG_REG_TMP); | ||
153 | } else { | ||
154 | - tcg_out_movext(s, TCG_TYPE_I32, datalo, | ||
155 | + tcg_out_movext(s, TCG_TYPE_I32, lb->datalo_reg, | ||
156 | TCG_TYPE_I32, opc & MO_SSIZE, TCG_REG_R0); | ||
157 | } | ||
158 | |||
159 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) | ||
160 | |||
161 | if (TARGET_LONG_BITS == 64) { | ||
162 | /* 64-bit target address is aligned into R2:R3. */ | ||
163 | - if (l->addrhi_reg != TCG_REG_R2) { | ||
164 | - tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, l->addrlo_reg); | ||
165 | - tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, l->addrhi_reg); | ||
166 | - } else if (l->addrlo_reg != TCG_REG_R3) { | ||
167 | - tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, l->addrhi_reg); | ||
168 | - tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, l->addrlo_reg); | ||
169 | - } else { | ||
170 | - tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, TCG_REG_R2); | ||
171 | - tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, TCG_REG_R3); | ||
172 | - tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, TCG_REG_R1); | ||
173 | - } | ||
174 | + TCGMovExtend ext[2] = { | ||
175 | + { .dst = TCG_REG_R2, .dst_type = TCG_TYPE_I32, | ||
176 | + .src = l->addrlo_reg, | ||
177 | + .src_type = TCG_TYPE_I32, .src_ext = MO_UL }, | ||
178 | + { .dst = TCG_REG_R3, .dst_type = TCG_TYPE_I32, | ||
179 | + .src = l->addrhi_reg, | ||
180 | + .src_type = TCG_TYPE_I32, .src_ext = MO_UL }, | ||
181 | + }; | ||
182 | + tcg_out_movext2(s, &ext[0], &ext[1], TCG_REG_TMP); | ||
183 | } else { | ||
184 | tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, l->addrlo_reg); | ||
185 | } | ||
186 | diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc | ||
187 | index XXXXXXX..XXXXXXX 100644 | ||
188 | --- a/tcg/i386/tcg-target.c.inc | ||
189 | +++ b/tcg/i386/tcg-target.c.inc | ||
190 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) | ||
191 | { | ||
192 | MemOpIdx oi = l->oi; | ||
193 | MemOp opc = get_memop(oi); | ||
194 | - TCGReg data_reg; | ||
195 | tcg_insn_unit **label_ptr = &l->label_ptr[0]; | ||
196 | |||
197 | /* resolve label address */ | ||
198 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) | ||
199 | |||
200 | tcg_out_branch(s, 1, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); | ||
201 | |||
202 | - data_reg = l->datalo_reg; | ||
203 | if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { | ||
204 | - if (data_reg == TCG_REG_EDX) { | ||
205 | - /* xchg %edx, %eax */ | ||
206 | - tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0); | ||
207 | - tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX); | ||
208 | - } else { | ||
209 | - tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX); | ||
210 | - tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX); | ||
211 | - } | ||
212 | + TCGMovExtend ext[2] = { | ||
213 | + { .dst = l->datalo_reg, .dst_type = TCG_TYPE_I32, | ||
214 | + .src = TCG_REG_EAX, .src_type = TCG_TYPE_I32, .src_ext = MO_UL }, | ||
215 | + { .dst = l->datahi_reg, .dst_type = TCG_TYPE_I32, | ||
216 | + .src = TCG_REG_EDX, .src_type = TCG_TYPE_I32, .src_ext = MO_UL }, | ||
217 | + }; | ||
218 | + tcg_out_movext2(s, &ext[0], &ext[1], -1); | ||
219 | } else { | ||
220 | - tcg_out_movext(s, l->type, data_reg, | ||
221 | + tcg_out_movext(s, l->type, l->datalo_reg, | ||
222 | TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_EAX); | ||
223 | } | ||
224 | |||
225 | -- | ||
226 | 2.34.1 | ||
227 | |||
228 | diff view generated by jsdifflib |