1 | The following changes since commit e18e5501d8ac692d32657a3e1ef545b14e72b730: | 1 | The following changes since commit 7c18f2d663521f1b31b821a13358ce38075eaf7d: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/dgilbert-gitlab/tags/pull-virtiofs-20200210' into staging (2020-02-10 18:09:14 +0000) | 3 | Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging (2023-04-29 23:07:17 +0100) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | https://github.com/rth7680/qemu.git tags/pull-tcg-20200212 | 7 | https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230502 |
8 | 8 | ||
9 | for you to fetch changes up to 2445971604c1cfd3ec484457159f4ac300fb04d2: | 9 | for you to fetch changes up to bdc7fba1c5a29ae218b45353daac9308fe1aae82: |
10 | 10 | ||
11 | tcg: Add tcg_gen_gvec_5_ptr (2020-02-12 14:58:36 -0800) | 11 | tcg: Introduce tcg_out_movext2 (2023-05-02 12:15:41 +0100) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Fix breakpoint invalidation. | 14 | Misc tcg-related patch queue. |
15 | Add support for tcg helpers with 7 arguments. | ||
16 | Add support for gvec helpers with 5 arguments. | ||
17 | 15 | ||
18 | ---------------------------------------------------------------- | 16 | ---------------------------------------------------------------- |
19 | Max Filippov (1): | 17 | Dickon Hood (1): |
20 | exec: flush CPU TB cache in breakpoint_invalidate | 18 | qemu/bitops.h: Limit rotate amounts |
21 | 19 | ||
22 | Richard Henderson (1): | 20 | Kiran Ostrolenk (1): |
23 | tcg: Add tcg_gen_gvec_5_ptr | 21 | qemu/host-utils.h: Add clz and ctz functions for lower-bit integers |
24 | 22 | ||
25 | Taylor Simpson (1): | 23 | Nazar Kazakov (2): |
26 | tcg: Add support for a helper with 7 arguments | 24 | tcg: Add tcg_gen_gvec_andcs |
25 | tcg: Add tcg_gen_gvec_rotrs | ||
27 | 26 | ||
28 | include/exec/helper-gen.h | 13 +++++++++++++ | 27 | Richard Henderson (7): |
29 | include/exec/helper-head.h | 2 ++ | 28 | softmmu: Tidy dirtylimit_dirty_ring_full_time |
30 | include/exec/helper-proto.h | 6 ++++++ | 29 | qemu/int128: Re-shuffle Int128Alias members |
31 | include/exec/helper-tcg.h | 7 +++++++ | 30 | migration/xbzrle: Use __attribute__((target)) for avx512 |
32 | include/tcg/tcg-op-gvec.h | 7 +++++++ | 31 | accel/tcg: Add cpu_ld*_code_mmu |
33 | exec.c | 15 +++++++-------- | 32 | tcg/loongarch64: Conditionalize tcg_out_exts_i32_i64 |
34 | tcg/tcg-op-gvec.c | 32 ++++++++++++++++++++++++++++++++ | 33 | tcg/mips: Conditionalize tcg_out_exts_i32_i64 |
35 | 7 files changed, 74 insertions(+), 8 deletions(-) | 34 | tcg: Introduce tcg_out_movext2 |
36 | 35 | ||
36 | Weiwei Li (1): | ||
37 | accel/tcg: Uncache the host address for instruction fetch when tlb size < 1 | ||
38 | |||
39 | meson.build | 5 +-- | ||
40 | accel/tcg/tcg-runtime.h | 1 + | ||
41 | include/exec/cpu_ldst.h | 9 ++++++ | ||
42 | include/qemu/bitops.h | 24 +++++++++----- | ||
43 | include/qemu/host-utils.h | 54 +++++++++++++++++++++++++++++++ | ||
44 | include/qemu/int128.h | 4 +-- | ||
45 | include/tcg/tcg-op-gvec.h | 4 +++ | ||
46 | accel/tcg/cputlb.c | 53 ++++++++++++++++++++++++++++++ | ||
47 | accel/tcg/tcg-runtime-gvec.c | 11 +++++++ | ||
48 | accel/tcg/user-exec.c | 58 +++++++++++++++++++++++++++++++++ | ||
49 | migration/xbzrle.c | 9 +++--- | ||
50 | softmmu/dirtylimit.c | 15 ++++++--- | ||
51 | tcg/tcg-op-gvec.c | 28 ++++++++++++++++ | ||
52 | tcg/tcg.c | 69 +++++++++++++++++++++++++++++++++++++--- | ||
53 | tcg/arm/tcg-target.c.inc | 44 +++++++++++-------------- | ||
54 | tcg/i386/tcg-target.c.inc | 19 +++++------ | ||
55 | tcg/loongarch64/tcg-target.c.inc | 4 ++- | ||
56 | tcg/mips/tcg-target.c.inc | 4 ++- | ||
57 | 18 files changed, 347 insertions(+), 68 deletions(-) | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Drop inline marker: let compiler decide. | ||
1 | 2 | ||
3 | Change return type to uint64_t: this matches the computation in the | ||
4 | return statement and the local variable assignment in the caller. | ||
5 | |||
6 | Rename local to dirty_ring_size_MB to fix typo. | ||
7 | Simplify conversion to MiB via qemu_target_page_bits and right shift. | ||
8 | |||
9 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
10 | Reviewed-by: Thomas Huth <thuth@redhat.com> | ||
11 | Reviewed-by: Juan Quintela <quintela@redhat.com> | ||
12 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
13 | --- | ||
14 | softmmu/dirtylimit.c | 15 ++++++++++----- | ||
15 | 1 file changed, 10 insertions(+), 5 deletions(-) | ||
16 | |||
17 | diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/softmmu/dirtylimit.c | ||
20 | +++ b/softmmu/dirtylimit.c | ||
21 | @@ -XXX,XX +XXX,XX @@ bool dirtylimit_vcpu_index_valid(int cpu_index) | ||
22 | cpu_index >= ms->smp.max_cpus); | ||
23 | } | ||
24 | |||
25 | -static inline int64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate) | ||
26 | +static uint64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate) | ||
27 | { | ||
28 | static uint64_t max_dirtyrate; | ||
29 | - uint32_t dirty_ring_size = kvm_dirty_ring_size(); | ||
30 | - uint64_t dirty_ring_size_meory_MB = | ||
31 | - dirty_ring_size * qemu_target_page_size() >> 20; | ||
32 | + unsigned target_page_bits = qemu_target_page_bits(); | ||
33 | + uint64_t dirty_ring_size_MB; | ||
34 | + | ||
35 | + /* So far, the largest (non-huge) page size is 64k, i.e. 16 bits. */ | ||
36 | + assert(target_page_bits < 20); | ||
37 | + | ||
38 | + /* Convert ring size (pages) to MiB (2**20). */ | ||
39 | + dirty_ring_size_MB = kvm_dirty_ring_size() >> (20 - target_page_bits); | ||
40 | |||
41 | if (max_dirtyrate < dirtyrate) { | ||
42 | max_dirtyrate = dirtyrate; | ||
43 | } | ||
44 | |||
45 | - return dirty_ring_size_meory_MB * 1000000 / max_dirtyrate; | ||
46 | + return dirty_ring_size_MB * 1000000 / max_dirtyrate; | ||
47 | } | ||
48 | |||
49 | static inline bool dirtylimit_done(uint64_t quota, | ||
50 | -- | ||
51 | 2.34.1 | ||
52 | |||
53 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Weiwei Li <liweiwei@iscas.ac.cn> | ||
1 | 2 | ||
3 | When PMP entry overlap part of the page, we'll set the tlb_size to 1, which | ||
4 | will make the address in tlb entry set with TLB_INVALID_MASK, and the next | ||
5 | access will again go through tlb_fill.However, this way will not work in | ||
6 | tb_gen_code() => get_page_addr_code_hostp(): the TLB host address will be | ||
7 | cached, and the following instructions can use this host address directly | ||
8 | which may lead to the bypass of PMP related check. | ||
9 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1542. | ||
10 | |||
11 | Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn> | ||
12 | Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn> | ||
13 | Reviewed-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com> | ||
14 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
15 | Message-Id: <20230422130329.23555-6-liweiwei@iscas.ac.cn> | ||
16 | --- | ||
17 | accel/tcg/cputlb.c | 5 +++++ | ||
18 | 1 file changed, 5 insertions(+) | ||
19 | |||
20 | diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/accel/tcg/cputlb.c | ||
23 | +++ b/accel/tcg/cputlb.c | ||
24 | @@ -XXX,XX +XXX,XX @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr, | ||
25 | if (p == NULL) { | ||
26 | return -1; | ||
27 | } | ||
28 | + | ||
29 | + if (full->lg_page_size < TARGET_PAGE_BITS) { | ||
30 | + return -1; | ||
31 | + } | ||
32 | + | ||
33 | if (hostp) { | ||
34 | *hostp = p; | ||
35 | } | ||
36 | -- | ||
37 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Dickon Hood <dickon.hood@codethink.co.uk> | ||
1 | 2 | ||
3 | Rotates have been fixed up to only allow for reasonable rotate amounts | ||
4 | (ie, no rotates >7 on an 8b value etc.) This fixes a problem with riscv | ||
5 | vector rotate instructions. | ||
6 | |||
7 | Signed-off-by: Dickon Hood <dickon.hood@codethink.co.uk> | ||
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | Message-Id: <20230428144757.57530-9-lawrence.hunter@codethink.co.uk> | ||
10 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
11 | --- | ||
12 | include/qemu/bitops.h | 24 ++++++++++++++++-------- | ||
13 | 1 file changed, 16 insertions(+), 8 deletions(-) | ||
14 | |||
15 | diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/include/qemu/bitops.h | ||
18 | +++ b/include/qemu/bitops.h | ||
19 | @@ -XXX,XX +XXX,XX @@ static inline unsigned long find_first_zero_bit(const unsigned long *addr, | ||
20 | */ | ||
21 | static inline uint8_t rol8(uint8_t word, unsigned int shift) | ||
22 | { | ||
23 | - return (word << shift) | (word >> ((8 - shift) & 7)); | ||
24 | + shift &= 7; | ||
25 | + return (word << shift) | (word >> (8 - shift)); | ||
26 | } | ||
27 | |||
28 | /** | ||
29 | @@ -XXX,XX +XXX,XX @@ static inline uint8_t rol8(uint8_t word, unsigned int shift) | ||
30 | */ | ||
31 | static inline uint8_t ror8(uint8_t word, unsigned int shift) | ||
32 | { | ||
33 | - return (word >> shift) | (word << ((8 - shift) & 7)); | ||
34 | + shift &= 7; | ||
35 | + return (word >> shift) | (word << (8 - shift)); | ||
36 | } | ||
37 | |||
38 | /** | ||
39 | @@ -XXX,XX +XXX,XX @@ static inline uint8_t ror8(uint8_t word, unsigned int shift) | ||
40 | */ | ||
41 | static inline uint16_t rol16(uint16_t word, unsigned int shift) | ||
42 | { | ||
43 | - return (word << shift) | (word >> ((16 - shift) & 15)); | ||
44 | + shift &= 15; | ||
45 | + return (word << shift) | (word >> (16 - shift)); | ||
46 | } | ||
47 | |||
48 | /** | ||
49 | @@ -XXX,XX +XXX,XX @@ static inline uint16_t rol16(uint16_t word, unsigned int shift) | ||
50 | */ | ||
51 | static inline uint16_t ror16(uint16_t word, unsigned int shift) | ||
52 | { | ||
53 | - return (word >> shift) | (word << ((16 - shift) & 15)); | ||
54 | + shift &= 15; | ||
55 | + return (word >> shift) | (word << (16 - shift)); | ||
56 | } | ||
57 | |||
58 | /** | ||
59 | @@ -XXX,XX +XXX,XX @@ static inline uint16_t ror16(uint16_t word, unsigned int shift) | ||
60 | */ | ||
61 | static inline uint32_t rol32(uint32_t word, unsigned int shift) | ||
62 | { | ||
63 | - return (word << shift) | (word >> ((32 - shift) & 31)); | ||
64 | + shift &= 31; | ||
65 | + return (word << shift) | (word >> (32 - shift)); | ||
66 | } | ||
67 | |||
68 | /** | ||
69 | @@ -XXX,XX +XXX,XX @@ static inline uint32_t rol32(uint32_t word, unsigned int shift) | ||
70 | */ | ||
71 | static inline uint32_t ror32(uint32_t word, unsigned int shift) | ||
72 | { | ||
73 | - return (word >> shift) | (word << ((32 - shift) & 31)); | ||
74 | + shift &= 31; | ||
75 | + return (word >> shift) | (word << (32 - shift)); | ||
76 | } | ||
77 | |||
78 | /** | ||
79 | @@ -XXX,XX +XXX,XX @@ static inline uint32_t ror32(uint32_t word, unsigned int shift) | ||
80 | */ | ||
81 | static inline uint64_t rol64(uint64_t word, unsigned int shift) | ||
82 | { | ||
83 | - return (word << shift) | (word >> ((64 - shift) & 63)); | ||
84 | + shift &= 63; | ||
85 | + return (word << shift) | (word >> (64 - shift)); | ||
86 | } | ||
87 | |||
88 | /** | ||
89 | @@ -XXX,XX +XXX,XX @@ static inline uint64_t rol64(uint64_t word, unsigned int shift) | ||
90 | */ | ||
91 | static inline uint64_t ror64(uint64_t word, unsigned int shift) | ||
92 | { | ||
93 | - return (word >> shift) | (word << ((64 - shift) & 63)); | ||
94 | + shift &= 63; | ||
95 | + return (word >> shift) | (word << (64 - shift)); | ||
96 | } | ||
97 | |||
98 | /** | ||
99 | -- | ||
100 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk> | ||
1 | 2 | ||
3 | This is for use in the RISC-V vclz and vctz instructions (implemented in | ||
4 | proceeding commit). | ||
5 | |||
6 | Signed-off-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk> | ||
7 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | Message-Id: <20230428144757.57530-11-lawrence.hunter@codethink.co.uk> | ||
9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | --- | ||
11 | include/qemu/host-utils.h | 54 +++++++++++++++++++++++++++++++++++++++ | ||
12 | 1 file changed, 54 insertions(+) | ||
13 | |||
14 | diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/include/qemu/host-utils.h | ||
17 | +++ b/include/qemu/host-utils.h | ||
18 | @@ -XXX,XX +XXX,XX @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c) | ||
19 | } | ||
20 | #endif | ||
21 | |||
22 | +/** | ||
23 | + * clz8 - count leading zeros in a 8-bit value. | ||
24 | + * @val: The value to search | ||
25 | + * | ||
26 | + * Returns 8 if the value is zero. Note that the GCC builtin is | ||
27 | + * undefined if the value is zero. | ||
28 | + * | ||
29 | + * Note that the GCC builtin will upcast its argument to an `unsigned int` | ||
30 | + * so this function subtracts off the number of prepended zeroes. | ||
31 | + */ | ||
32 | +static inline int clz8(uint8_t val) | ||
33 | +{ | ||
34 | + return val ? __builtin_clz(val) - 24 : 8; | ||
35 | +} | ||
36 | + | ||
37 | +/** | ||
38 | + * clz16 - count leading zeros in a 16-bit value. | ||
39 | + * @val: The value to search | ||
40 | + * | ||
41 | + * Returns 16 if the value is zero. Note that the GCC builtin is | ||
42 | + * undefined if the value is zero. | ||
43 | + * | ||
44 | + * Note that the GCC builtin will upcast its argument to an `unsigned int` | ||
45 | + * so this function subtracts off the number of prepended zeroes. | ||
46 | + */ | ||
47 | +static inline int clz16(uint16_t val) | ||
48 | +{ | ||
49 | + return val ? __builtin_clz(val) - 16 : 16; | ||
50 | +} | ||
51 | + | ||
52 | /** | ||
53 | * clz32 - count leading zeros in a 32-bit value. | ||
54 | * @val: The value to search | ||
55 | @@ -XXX,XX +XXX,XX @@ static inline int clo64(uint64_t val) | ||
56 | return clz64(~val); | ||
57 | } | ||
58 | |||
59 | +/** | ||
60 | + * ctz8 - count trailing zeros in a 8-bit value. | ||
61 | + * @val: The value to search | ||
62 | + * | ||
63 | + * Returns 8 if the value is zero. Note that the GCC builtin is | ||
64 | + * undefined if the value is zero. | ||
65 | + */ | ||
66 | +static inline int ctz8(uint8_t val) | ||
67 | +{ | ||
68 | + return val ? __builtin_ctz(val) : 8; | ||
69 | +} | ||
70 | + | ||
71 | +/** | ||
72 | + * ctz16 - count trailing zeros in a 16-bit value. | ||
73 | + * @val: The value to search | ||
74 | + * | ||
75 | + * Returns 16 if the value is zero. Note that the GCC builtin is | ||
76 | + * undefined if the value is zero. | ||
77 | + */ | ||
78 | +static inline int ctz16(uint16_t val) | ||
79 | +{ | ||
80 | + return val ? __builtin_ctz(val) : 16; | ||
81 | +} | ||
82 | + | ||
83 | /** | ||
84 | * ctz32 - count trailing zeros in a 32-bit value. | ||
85 | * @val: The value to search | ||
86 | -- | ||
87 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Nazar Kazakov <nazar.kazakov@codethink.co.uk> | ||
1 | 2 | ||
3 | Add tcg expander and helper functions for and-compliment | ||
4 | vector with scalar operand. | ||
5 | |||
6 | Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk> | ||
7 | Message-Id: <20230428144757.57530-10-lawrence.hunter@codethink.co.uk> | ||
8 | [rth: Split out of larger patch.] | ||
9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
10 | --- | ||
11 | accel/tcg/tcg-runtime.h | 1 + | ||
12 | include/tcg/tcg-op-gvec.h | 2 ++ | ||
13 | accel/tcg/tcg-runtime-gvec.c | 11 +++++++++++ | ||
14 | tcg/tcg-op-gvec.c | 17 +++++++++++++++++ | ||
15 | 4 files changed, 31 insertions(+) | ||
16 | |||
17 | diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/accel/tcg/tcg-runtime.h | ||
20 | +++ b/accel/tcg/tcg-runtime.h | ||
21 | @@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(gvec_nor, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
22 | DEF_HELPER_FLAGS_4(gvec_eqv, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) | ||
23 | |||
24 | DEF_HELPER_FLAGS_4(gvec_ands, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
25 | +DEF_HELPER_FLAGS_4(gvec_andcs, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
26 | DEF_HELPER_FLAGS_4(gvec_xors, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
27 | DEF_HELPER_FLAGS_4(gvec_ors, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32) | ||
28 | |||
29 | diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h | ||
30 | index XXXXXXX..XXXXXXX 100644 | ||
31 | --- a/include/tcg/tcg-op-gvec.h | ||
32 | +++ b/include/tcg/tcg-op-gvec.h | ||
33 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_ori(unsigned vece, uint32_t dofs, uint32_t aofs, | ||
34 | |||
35 | void tcg_gen_gvec_ands(unsigned vece, uint32_t dofs, uint32_t aofs, | ||
36 | TCGv_i64 c, uint32_t oprsz, uint32_t maxsz); | ||
37 | +void tcg_gen_gvec_andcs(unsigned vece, uint32_t dofs, uint32_t aofs, | ||
38 | + TCGv_i64 c, uint32_t oprsz, uint32_t maxsz); | ||
39 | void tcg_gen_gvec_xors(unsigned vece, uint32_t dofs, uint32_t aofs, | ||
40 | TCGv_i64 c, uint32_t oprsz, uint32_t maxsz); | ||
41 | void tcg_gen_gvec_ors(unsigned vece, uint32_t dofs, uint32_t aofs, | ||
42 | diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/accel/tcg/tcg-runtime-gvec.c | ||
45 | +++ b/accel/tcg/tcg-runtime-gvec.c | ||
46 | @@ -XXX,XX +XXX,XX @@ void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc) | ||
47 | clear_high(d, oprsz, desc); | ||
48 | } | ||
49 | |||
50 | +void HELPER(gvec_andcs)(void *d, void *a, uint64_t b, uint32_t desc) | ||
51 | +{ | ||
52 | + intptr_t oprsz = simd_oprsz(desc); | ||
53 | + intptr_t i; | ||
54 | + | ||
55 | + for (i = 0; i < oprsz; i += sizeof(uint64_t)) { | ||
56 | + *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & ~b; | ||
57 | + } | ||
58 | + clear_high(d, oprsz, desc); | ||
59 | +} | ||
60 | + | ||
61 | void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc) | ||
62 | { | ||
63 | intptr_t oprsz = simd_oprsz(desc); | ||
64 | diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c | ||
65 | index XXXXXXX..XXXXXXX 100644 | ||
66 | --- a/tcg/tcg-op-gvec.c | ||
67 | +++ b/tcg/tcg-op-gvec.c | ||
68 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_andi(unsigned vece, uint32_t dofs, uint32_t aofs, | ||
69 | tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_ands); | ||
70 | } | ||
71 | |||
72 | +void tcg_gen_gvec_andcs(unsigned vece, uint32_t dofs, uint32_t aofs, | ||
73 | + TCGv_i64 c, uint32_t oprsz, uint32_t maxsz) | ||
74 | +{ | ||
75 | + static GVecGen2s g = { | ||
76 | + .fni8 = tcg_gen_andc_i64, | ||
77 | + .fniv = tcg_gen_andc_vec, | ||
78 | + .fno = gen_helper_gvec_andcs, | ||
79 | + .prefer_i64 = TCG_TARGET_REG_BITS == 64, | ||
80 | + .vece = MO_64 | ||
81 | + }; | ||
82 | + | ||
83 | + TCGv_i64 tmp = tcg_temp_ebb_new_i64(); | ||
84 | + tcg_gen_dup_i64(vece, tmp, c); | ||
85 | + tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, c, &g); | ||
86 | + tcg_temp_free_i64(tmp); | ||
87 | +} | ||
88 | + | ||
89 | static const GVecGen2s gop_xors = { | ||
90 | .fni8 = tcg_gen_xor_i64, | ||
91 | .fniv = tcg_gen_xor_vec, | ||
92 | -- | ||
93 | 2.34.1 | diff view generated by jsdifflib |
1 | Extend the vector generator infrastructure to handle | 1 | From: Nazar Kazakov <nazar.kazakov@codethink.co.uk> |
---|---|---|---|
2 | 5 vector arguments. | ||
3 | 2 | ||
4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> | 3 | Add tcg expander and helper functions for rotate right |
5 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | 4 | vector with scalar operand. |
6 | Reviewed-by: Taylor Simpson <tsimpson@quicinc.com> | 5 | |
6 | Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk> | ||
7 | Message-Id: <20230428144757.57530-10-lawrence.hunter@codethink.co.uk> | ||
8 | [rth: Split out of larger patch; mask rotation count.] | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
8 | --- | 10 | --- |
9 | include/tcg/tcg-op-gvec.h | 7 +++++++ | 11 | include/tcg/tcg-op-gvec.h | 2 ++ |
10 | tcg/tcg-op-gvec.c | 32 ++++++++++++++++++++++++++++++++ | 12 | tcg/tcg-op-gvec.c | 11 +++++++++++ |
11 | 2 files changed, 39 insertions(+) | 13 | 2 files changed, 13 insertions(+) |
12 | 14 | ||
13 | diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h | 15 | diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h |
14 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/include/tcg/tcg-op-gvec.h | 17 | --- a/include/tcg/tcg-op-gvec.h |
16 | +++ b/include/tcg/tcg-op-gvec.h | 18 | +++ b/include/tcg/tcg-op-gvec.h |
17 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_4_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs, | 19 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_sars(unsigned vece, uint32_t dofs, uint32_t aofs, |
18 | uint32_t maxsz, int32_t data, | 20 | TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz); |
19 | gen_helper_gvec_4_ptr *fn); | 21 | void tcg_gen_gvec_rotls(unsigned vece, uint32_t dofs, uint32_t aofs, |
20 | 22 | TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz); | |
21 | +typedef void gen_helper_gvec_5_ptr(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, | 23 | +void tcg_gen_gvec_rotrs(unsigned vece, uint32_t dofs, uint32_t aofs, |
22 | + TCGv_ptr, TCGv_ptr, TCGv_i32); | 24 | + TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz); |
23 | +void tcg_gen_gvec_5_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs, | 25 | |
24 | + uint32_t cofs, uint32_t eofs, TCGv_ptr ptr, | 26 | /* |
25 | + uint32_t oprsz, uint32_t maxsz, int32_t data, | 27 | * Perform vector shift by vector element, modulo the element size. |
26 | + gen_helper_gvec_5_ptr *fn); | ||
27 | + | ||
28 | /* Expand a gvec operation. Either inline or out-of-line depending on | ||
29 | the actual vector size and the operations supported by the host. */ | ||
30 | typedef struct { | ||
31 | diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c | 28 | diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c |
32 | index XXXXXXX..XXXXXXX 100644 | 29 | index XXXXXXX..XXXXXXX 100644 |
33 | --- a/tcg/tcg-op-gvec.c | 30 | --- a/tcg/tcg-op-gvec.c |
34 | +++ b/tcg/tcg-op-gvec.c | 31 | +++ b/tcg/tcg-op-gvec.c |
35 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_4_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs, | 32 | @@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_rotls(unsigned vece, uint32_t dofs, uint32_t aofs, |
36 | tcg_temp_free_i32(desc); | 33 | do_gvec_shifts(vece, dofs, aofs, shift, oprsz, maxsz, &g); |
37 | } | 34 | } |
38 | 35 | ||
39 | +/* Generate a call to a gvec-style helper with five vector operands | 36 | +void tcg_gen_gvec_rotrs(unsigned vece, uint32_t dofs, uint32_t aofs, |
40 | + and an extra pointer operand. */ | 37 | + TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz) |
41 | +void tcg_gen_gvec_5_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs, | ||
42 | + uint32_t cofs, uint32_t eofs, TCGv_ptr ptr, | ||
43 | + uint32_t oprsz, uint32_t maxsz, int32_t data, | ||
44 | + gen_helper_gvec_5_ptr *fn) | ||
45 | +{ | 38 | +{ |
46 | + TCGv_ptr a0, a1, a2, a3, a4; | 39 | + TCGv_i32 tmp = tcg_temp_ebb_new_i32(); |
47 | + TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data)); | ||
48 | + | 40 | + |
49 | + a0 = tcg_temp_new_ptr(); | 41 | + tcg_gen_neg_i32(tmp, shift); |
50 | + a1 = tcg_temp_new_ptr(); | 42 | + tcg_gen_andi_i32(tmp, tmp, (8 << vece) - 1); |
51 | + a2 = tcg_temp_new_ptr(); | 43 | + tcg_gen_gvec_rotls(vece, dofs, aofs, tmp, oprsz, maxsz); |
52 | + a3 = tcg_temp_new_ptr(); | 44 | + tcg_temp_free_i32(tmp); |
53 | + a4 = tcg_temp_new_ptr(); | ||
54 | + | ||
55 | + tcg_gen_addi_ptr(a0, cpu_env, dofs); | ||
56 | + tcg_gen_addi_ptr(a1, cpu_env, aofs); | ||
57 | + tcg_gen_addi_ptr(a2, cpu_env, bofs); | ||
58 | + tcg_gen_addi_ptr(a3, cpu_env, cofs); | ||
59 | + tcg_gen_addi_ptr(a4, cpu_env, eofs); | ||
60 | + | ||
61 | + fn(a0, a1, a2, a3, a4, ptr, desc); | ||
62 | + | ||
63 | + tcg_temp_free_ptr(a0); | ||
64 | + tcg_temp_free_ptr(a1); | ||
65 | + tcg_temp_free_ptr(a2); | ||
66 | + tcg_temp_free_ptr(a3); | ||
67 | + tcg_temp_free_ptr(a4); | ||
68 | + tcg_temp_free_i32(desc); | ||
69 | +} | 45 | +} |
70 | + | 46 | + |
71 | /* Return true if we want to implement something of OPRSZ bytes | 47 | /* |
72 | in units of LNSZ. This limits the expansion of inline code. */ | 48 | * Expand D = A << (B % element bits) |
73 | static inline bool check_size_impl(uint32_t oprsz, uint32_t lnsz) | 49 | * |
74 | -- | 50 | -- |
75 | 2.20.1 | 51 | 2.34.1 |
76 | |||
77 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Clang 14, with --enable-tcg-interpreter errors with | ||
1 | 2 | ||
3 | include/qemu/int128.h:487:16: error: alignment of field 'i' (128 bits) | ||
4 | does not match the alignment of the first field in transparent union; | ||
5 | transparent_union attribute ignored [-Werror,-Wignored-attributes] | ||
6 | __int128_t i; | ||
7 | ^ | ||
8 | include/qemu/int128.h:486:12: note: alignment of first field is 64 bits | ||
9 | Int128 s; | ||
10 | ^ | ||
11 | 1 error generated. | ||
12 | |||
13 | By placing the __uint128_t member first, this is avoided. | ||
14 | |||
15 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
16 | Reviewed-by: Alex Bennée <alex.bennee@linaro.org> | ||
17 | Message-Id: <20230501204625.277361-1-richard.henderson@linaro.org> | ||
18 | --- | ||
19 | include/qemu/int128.h | 4 ++-- | ||
20 | 1 file changed, 2 insertions(+), 2 deletions(-) | ||
21 | |||
22 | diff --git a/include/qemu/int128.h b/include/qemu/int128.h | ||
23 | index XXXXXXX..XXXXXXX 100644 | ||
24 | --- a/include/qemu/int128.h | ||
25 | +++ b/include/qemu/int128.h | ||
26 | @@ -XXX,XX +XXX,XX @@ static inline void bswap128s(Int128 *s) | ||
27 | */ | ||
28 | #ifdef CONFIG_INT128 | ||
29 | typedef union { | ||
30 | - Int128 s; | ||
31 | - __int128_t i; | ||
32 | __uint128_t u; | ||
33 | + __int128_t i; | ||
34 | + Int128 s; | ||
35 | } Int128Alias __attribute__((transparent_union)); | ||
36 | #else | ||
37 | typedef Int128 Int128Alias; | ||
38 | -- | ||
39 | 2.34.1 | ||
40 | |||
41 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Use the attribute, which is supported by clang, instead of | ||
2 | the #pragma, which is not supported and, for some reason, | ||
3 | also not detected by the meson probe, so we fail by -Werror. | ||
1 | 4 | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Reviewed-by: Juan Quintela <quintela@redhat.com> | ||
7 | Message-Id: <20230501210555.289806-1-richard.henderson@linaro.org> | ||
8 | --- | ||
9 | meson.build | 5 +---- | ||
10 | migration/xbzrle.c | 9 ++++----- | ||
11 | 2 files changed, 5 insertions(+), 9 deletions(-) | ||
12 | |||
13 | diff --git a/meson.build b/meson.build | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/meson.build | ||
16 | +++ b/meson.build | ||
17 | @@ -XXX,XX +XXX,XX @@ config_host_data.set('CONFIG_AVX512F_OPT', get_option('avx512f') \ | ||
18 | config_host_data.set('CONFIG_AVX512BW_OPT', get_option('avx512bw') \ | ||
19 | .require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX512BW') \ | ||
20 | .require(cc.links(''' | ||
21 | - #pragma GCC push_options | ||
22 | - #pragma GCC target("avx512bw") | ||
23 | #include <cpuid.h> | ||
24 | #include <immintrin.h> | ||
25 | - static int bar(void *a) { | ||
26 | - | ||
27 | + static int __attribute__((target("avx512bw"))) bar(void *a) { | ||
28 | __m512i *x = a; | ||
29 | __m512i res= _mm512_abs_epi8(*x); | ||
30 | return res[1]; | ||
31 | diff --git a/migration/xbzrle.c b/migration/xbzrle.c | ||
32 | index XXXXXXX..XXXXXXX 100644 | ||
33 | --- a/migration/xbzrle.c | ||
34 | +++ b/migration/xbzrle.c | ||
35 | @@ -XXX,XX +XXX,XX @@ int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen) | ||
36 | } | ||
37 | |||
38 | #if defined(CONFIG_AVX512BW_OPT) | ||
39 | -#pragma GCC push_options | ||
40 | -#pragma GCC target("avx512bw") | ||
41 | #include <immintrin.h> | ||
42 | -int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen, | ||
43 | - uint8_t *dst, int dlen) | ||
44 | + | ||
45 | +int __attribute__((target("avx512bw"))) | ||
46 | +xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen, | ||
47 | + uint8_t *dst, int dlen) | ||
48 | { | ||
49 | uint32_t zrun_len = 0, nzrun_len = 0; | ||
50 | int d = 0, i = 0, num = 0; | ||
51 | @@ -XXX,XX +XXX,XX @@ int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen, | ||
52 | } | ||
53 | return d; | ||
54 | } | ||
55 | -#pragma GCC pop_options | ||
56 | #endif | ||
57 | -- | ||
58 | 2.34.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | At least RISC-V has the need to be able to perform a read | ||
2 | using execute permissions, outside of translation. | ||
3 | Add helpers to facilitate this. | ||
1 | 4 | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | Acked-by: Alistair Francis <alistair.francis@wdc.com> | ||
7 | Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn> | ||
8 | Tested-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> | ||
9 | Message-Id: <20230325105429.1142530-9-richard.henderson@linaro.org> | ||
10 | Message-Id: <20230412114333.118895-9-richard.henderson@linaro.org> | ||
11 | --- | ||
12 | include/exec/cpu_ldst.h | 9 +++++++ | ||
13 | accel/tcg/cputlb.c | 48 ++++++++++++++++++++++++++++++++++ | ||
14 | accel/tcg/user-exec.c | 58 +++++++++++++++++++++++++++++++++++++++++ | ||
15 | 3 files changed, 115 insertions(+) | ||
16 | |||
17 | diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/include/exec/cpu_ldst.h | ||
20 | +++ b/include/exec/cpu_ldst.h | ||
21 | @@ -XXX,XX +XXX,XX @@ static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx, | ||
22 | # define cpu_stq_mmu cpu_stq_le_mmu | ||
23 | #endif | ||
24 | |||
25 | +uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr, | ||
26 | + MemOpIdx oi, uintptr_t ra); | ||
27 | +uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr, | ||
28 | + MemOpIdx oi, uintptr_t ra); | ||
29 | +uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr, | ||
30 | + MemOpIdx oi, uintptr_t ra); | ||
31 | +uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr, | ||
32 | + MemOpIdx oi, uintptr_t ra); | ||
33 | + | ||
34 | uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr); | ||
35 | uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr); | ||
36 | uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr); | ||
37 | diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/accel/tcg/cputlb.c | ||
40 | +++ b/accel/tcg/cputlb.c | ||
41 | @@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr) | ||
42 | MemOpIdx oi = make_memop_idx(MO_TEUQ, cpu_mmu_index(env, true)); | ||
43 | return full_ldq_code(env, addr, oi, 0); | ||
44 | } | ||
45 | + | ||
46 | +uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr, | ||
47 | + MemOpIdx oi, uintptr_t retaddr) | ||
48 | +{ | ||
49 | + return full_ldub_code(env, addr, oi, retaddr); | ||
50 | +} | ||
51 | + | ||
52 | +uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr, | ||
53 | + MemOpIdx oi, uintptr_t retaddr) | ||
54 | +{ | ||
55 | + MemOp mop = get_memop(oi); | ||
56 | + int idx = get_mmuidx(oi); | ||
57 | + uint16_t ret; | ||
58 | + | ||
59 | + ret = full_lduw_code(env, addr, make_memop_idx(MO_TEUW, idx), retaddr); | ||
60 | + if ((mop & MO_BSWAP) != MO_TE) { | ||
61 | + ret = bswap16(ret); | ||
62 | + } | ||
63 | + return ret; | ||
64 | +} | ||
65 | + | ||
66 | +uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr, | ||
67 | + MemOpIdx oi, uintptr_t retaddr) | ||
68 | +{ | ||
69 | + MemOp mop = get_memop(oi); | ||
70 | + int idx = get_mmuidx(oi); | ||
71 | + uint32_t ret; | ||
72 | + | ||
73 | + ret = full_ldl_code(env, addr, make_memop_idx(MO_TEUL, idx), retaddr); | ||
74 | + if ((mop & MO_BSWAP) != MO_TE) { | ||
75 | + ret = bswap32(ret); | ||
76 | + } | ||
77 | + return ret; | ||
78 | +} | ||
79 | + | ||
80 | +uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr, | ||
81 | + MemOpIdx oi, uintptr_t retaddr) | ||
82 | +{ | ||
83 | + MemOp mop = get_memop(oi); | ||
84 | + int idx = get_mmuidx(oi); | ||
85 | + uint64_t ret; | ||
86 | + | ||
87 | + ret = full_ldq_code(env, addr, make_memop_idx(MO_TEUQ, idx), retaddr); | ||
88 | + if ((mop & MO_BSWAP) != MO_TE) { | ||
89 | + ret = bswap64(ret); | ||
90 | + } | ||
91 | + return ret; | ||
92 | +} | ||
93 | diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c | ||
94 | index XXXXXXX..XXXXXXX 100644 | ||
95 | --- a/accel/tcg/user-exec.c | ||
96 | +++ b/accel/tcg/user-exec.c | ||
97 | @@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr ptr) | ||
98 | return ret; | ||
99 | } | ||
100 | |||
101 | +uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr, | ||
102 | + MemOpIdx oi, uintptr_t ra) | ||
103 | +{ | ||
104 | + void *haddr; | ||
105 | + uint8_t ret; | ||
106 | + | ||
107 | + haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_INST_FETCH); | ||
108 | + ret = ldub_p(haddr); | ||
109 | + clear_helper_retaddr(); | ||
110 | + return ret; | ||
111 | +} | ||
112 | + | ||
113 | +uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr, | ||
114 | + MemOpIdx oi, uintptr_t ra) | ||
115 | +{ | ||
116 | + void *haddr; | ||
117 | + uint16_t ret; | ||
118 | + | ||
119 | + haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_INST_FETCH); | ||
120 | + ret = lduw_p(haddr); | ||
121 | + clear_helper_retaddr(); | ||
122 | + if (get_memop(oi) & MO_BSWAP) { | ||
123 | + ret = bswap16(ret); | ||
124 | + } | ||
125 | + return ret; | ||
126 | +} | ||
127 | + | ||
128 | +uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr, | ||
129 | + MemOpIdx oi, uintptr_t ra) | ||
130 | +{ | ||
131 | + void *haddr; | ||
132 | + uint32_t ret; | ||
133 | + | ||
134 | + haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_INST_FETCH); | ||
135 | + ret = ldl_p(haddr); | ||
136 | + clear_helper_retaddr(); | ||
137 | + if (get_memop(oi) & MO_BSWAP) { | ||
138 | + ret = bswap32(ret); | ||
139 | + } | ||
140 | + return ret; | ||
141 | +} | ||
142 | + | ||
143 | +uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr, | ||
144 | + MemOpIdx oi, uintptr_t ra) | ||
145 | +{ | ||
146 | + void *haddr; | ||
147 | + uint64_t ret; | ||
148 | + | ||
149 | + validate_memop(oi, MO_BEUQ); | ||
150 | + haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD); | ||
151 | + ret = ldq_p(haddr); | ||
152 | + clear_helper_retaddr(); | ||
153 | + if (get_memop(oi) & MO_BSWAP) { | ||
154 | + ret = bswap64(ret); | ||
155 | + } | ||
156 | + return ret; | ||
157 | +} | ||
158 | + | ||
159 | #include "ldst_common.c.inc" | ||
160 | |||
161 | /* | ||
162 | -- | ||
163 | 2.34.1 | diff view generated by jsdifflib |
1 | From: Taylor Simpson <tsimpson@quicinc.com> | 1 | Since TCG_TYPE_I32 values are kept sign-extended in registers, |
---|---|---|---|
2 | via ".w" instructions, we need not extend if the register matches. | ||
3 | This is already relied upon by comparisons. | ||
2 | 4 | ||
3 | Currently, helpers can only take up to 6 arguments. This patch adds the | 5 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
4 | capability for up to 7 arguments. I have tested it with the Hexagon port | ||
5 | that I am preparing for submission. | ||
6 | |||
7 | Signed-off-by: Taylor Simpson <tsimpson@quicinc.com> | ||
8 | Message-Id: <1580942510-2820-1-git-send-email-tsimpson@quicinc.com> | ||
9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 6 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
10 | --- | 7 | --- |
11 | include/exec/helper-gen.h | 13 +++++++++++++ | 8 | tcg/loongarch64/tcg-target.c.inc | 4 +++- |
12 | include/exec/helper-head.h | 2 ++ | 9 | 1 file changed, 3 insertions(+), 1 deletion(-) |
13 | include/exec/helper-proto.h | 6 ++++++ | ||
14 | include/exec/helper-tcg.h | 7 +++++++ | ||
15 | 4 files changed, 28 insertions(+) | ||
16 | 10 | ||
17 | diff --git a/include/exec/helper-gen.h b/include/exec/helper-gen.h | 11 | diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc |
18 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/include/exec/helper-gen.h | 13 | --- a/tcg/loongarch64/tcg-target.c.inc |
20 | +++ b/include/exec/helper-gen.h | 14 | +++ b/tcg/loongarch64/tcg-target.c.inc |
21 | @@ -XXX,XX +XXX,XX @@ static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \ | 15 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg) |
22 | tcg_gen_callN(HELPER(name), dh_retvar(ret), 6, args); \ | 16 | |
17 | static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg) | ||
18 | { | ||
19 | - tcg_out_ext32s(s, ret, arg); | ||
20 | + if (ret != arg) { | ||
21 | + tcg_out_ext32s(s, ret, arg); | ||
22 | + } | ||
23 | } | 23 | } |
24 | 24 | ||
25 | +#define DEF_HELPER_FLAGS_7(name, flags, ret, t1, t2, t3, t4, t5, t6, t7)\ | 25 | static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg) |
26 | +static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \ | ||
27 | + dh_arg_decl(t1, 1), dh_arg_decl(t2, 2), dh_arg_decl(t3, 3), \ | ||
28 | + dh_arg_decl(t4, 4), dh_arg_decl(t5, 5), dh_arg_decl(t6, 6), \ | ||
29 | + dh_arg_decl(t7, 7)) \ | ||
30 | +{ \ | ||
31 | + TCGTemp *args[7] = { dh_arg(t1, 1), dh_arg(t2, 2), dh_arg(t3, 3), \ | ||
32 | + dh_arg(t4, 4), dh_arg(t5, 5), dh_arg(t6, 6), \ | ||
33 | + dh_arg(t7, 7) }; \ | ||
34 | + tcg_gen_callN(HELPER(name), dh_retvar(ret), 7, args); \ | ||
35 | +} | ||
36 | + | ||
37 | #include "helper.h" | ||
38 | #include "trace/generated-helpers.h" | ||
39 | #include "trace/generated-helpers-wrappers.h" | ||
40 | @@ -XXX,XX +XXX,XX @@ static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \ | ||
41 | #undef DEF_HELPER_FLAGS_4 | ||
42 | #undef DEF_HELPER_FLAGS_5 | ||
43 | #undef DEF_HELPER_FLAGS_6 | ||
44 | +#undef DEF_HELPER_FLAGS_7 | ||
45 | #undef GEN_HELPER | ||
46 | |||
47 | #endif /* HELPER_GEN_H */ | ||
48 | diff --git a/include/exec/helper-head.h b/include/exec/helper-head.h | ||
49 | index XXXXXXX..XXXXXXX 100644 | ||
50 | --- a/include/exec/helper-head.h | ||
51 | +++ b/include/exec/helper-head.h | ||
52 | @@ -XXX,XX +XXX,XX @@ | ||
53 | DEF_HELPER_FLAGS_5(name, 0, ret, t1, t2, t3, t4, t5) | ||
54 | #define DEF_HELPER_6(name, ret, t1, t2, t3, t4, t5, t6) \ | ||
55 | DEF_HELPER_FLAGS_6(name, 0, ret, t1, t2, t3, t4, t5, t6) | ||
56 | +#define DEF_HELPER_7(name, ret, t1, t2, t3, t4, t5, t6, t7) \ | ||
57 | + DEF_HELPER_FLAGS_7(name, 0, ret, t1, t2, t3, t4, t5, t6, t7) | ||
58 | |||
59 | /* MAX_OPC_PARAM_IARGS must be set to n if last entry is DEF_HELPER_FLAGS_n. */ | ||
60 | |||
61 | diff --git a/include/exec/helper-proto.h b/include/exec/helper-proto.h | ||
62 | index XXXXXXX..XXXXXXX 100644 | ||
63 | --- a/include/exec/helper-proto.h | ||
64 | +++ b/include/exec/helper-proto.h | ||
65 | @@ -XXX,XX +XXX,XX @@ dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \ | ||
66 | dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \ | ||
67 | dh_ctype(t4), dh_ctype(t5), dh_ctype(t6)); | ||
68 | |||
69 | +#define DEF_HELPER_FLAGS_7(name, flags, ret, t1, t2, t3, t4, t5, t6, t7) \ | ||
70 | +dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \ | ||
71 | + dh_ctype(t4), dh_ctype(t5), dh_ctype(t6), \ | ||
72 | + dh_ctype(t7)); | ||
73 | + | ||
74 | #include "helper.h" | ||
75 | #include "trace/generated-helpers.h" | ||
76 | #include "tcg-runtime.h" | ||
77 | @@ -XXX,XX +XXX,XX @@ dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \ | ||
78 | #undef DEF_HELPER_FLAGS_4 | ||
79 | #undef DEF_HELPER_FLAGS_5 | ||
80 | #undef DEF_HELPER_FLAGS_6 | ||
81 | +#undef DEF_HELPER_FLAGS_7 | ||
82 | |||
83 | #endif /* HELPER_PROTO_H */ | ||
84 | diff --git a/include/exec/helper-tcg.h b/include/exec/helper-tcg.h | ||
85 | index XXXXXXX..XXXXXXX 100644 | ||
86 | --- a/include/exec/helper-tcg.h | ||
87 | +++ b/include/exec/helper-tcg.h | ||
88 | @@ -XXX,XX +XXX,XX @@ | ||
89 | | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) \ | ||
90 | | dh_sizemask(t5, 5) | dh_sizemask(t6, 6) }, | ||
91 | |||
92 | +#define DEF_HELPER_FLAGS_7(NAME, FLAGS, ret, t1, t2, t3, t4, t5, t6, t7) \ | ||
93 | + { .func = HELPER(NAME), .name = str(NAME), .flags = FLAGS, \ | ||
94 | + .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \ | ||
95 | + | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) \ | ||
96 | + | dh_sizemask(t5, 5) | dh_sizemask(t6, 6) | dh_sizemask(t7, 7) }, | ||
97 | + | ||
98 | #include "helper.h" | ||
99 | #include "trace/generated-helpers.h" | ||
100 | #include "tcg-runtime.h" | ||
101 | @@ -XXX,XX +XXX,XX @@ | ||
102 | #undef DEF_HELPER_FLAGS_4 | ||
103 | #undef DEF_HELPER_FLAGS_5 | ||
104 | #undef DEF_HELPER_FLAGS_6 | ||
105 | +#undef DEF_HELPER_FLAGS_7 | ||
106 | |||
107 | #endif /* HELPER_TCG_H */ | ||
108 | -- | 26 | -- |
109 | 2.20.1 | 27 | 2.34.1 |
110 | 28 | ||
111 | 29 | diff view generated by jsdifflib |
1 | From: Max Filippov <jcmvbkbc@gmail.com> | 1 | Since TCG_TYPE_I32 values are kept sign-extended in registers, we need not |
---|---|---|---|
2 | extend if the register matches. This is already relied upon by comparisons. | ||
2 | 3 | ||
3 | When a breakpoint is inserted at location for which there's currently no | 4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
4 | virtual to physical translation no action is taken on CPU TB cache. If a | ||
5 | TB for that virtual address already exists but is not visible ATM the | ||
6 | breakpoint won't be hit next time an instruction at that address will be | ||
7 | executed. | ||
8 | |||
9 | Flush entire CPU TB cache in breakpoint_invalidate to force | ||
10 | re-translation of all TBs for the breakpoint address. | ||
11 | |||
12 | This change fixes the following scenario: | ||
13 | - linux user application is running | ||
14 | - a breakpoint is inserted from QEMU gdbstub for a user address that is | ||
15 | not currently present in the target CPU TLB | ||
16 | - an instruction at that address is executed, but the external debugger | ||
17 | doesn't get control. | ||
18 | |||
19 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
20 | Signed-off-by: Max Filippov <jcmvbkbc@gmail.com> | ||
21 | Message-Id: <20191127220602.10827-2-jcmvbkbc@gmail.com> | ||
22 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
23 | --- | 6 | --- |
24 | exec.c | 15 +++++++-------- | 7 | tcg/mips/tcg-target.c.inc | 4 +++- |
25 | 1 file changed, 7 insertions(+), 8 deletions(-) | 8 | 1 file changed, 3 insertions(+), 1 deletion(-) |
26 | 9 | ||
27 | diff --git a/exec.c b/exec.c | 10 | diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc |
28 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
29 | --- a/exec.c | 12 | --- a/tcg/mips/tcg-target.c.inc |
30 | +++ b/exec.c | 13 | +++ b/tcg/mips/tcg-target.c.inc |
31 | @@ -XXX,XX +XXX,XX @@ void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs) | 14 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rs) |
32 | 15 | ||
33 | static void breakpoint_invalidate(CPUState *cpu, target_ulong pc) | 16 | static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rs) |
34 | { | 17 | { |
35 | - MemTxAttrs attrs; | 18 | - tcg_out_ext32s(s, rd, rs); |
36 | - hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs); | 19 | + if (rd != rs) { |
37 | - int asidx = cpu_asidx_from_attrs(cpu, attrs); | 20 | + tcg_out_ext32s(s, rd, rs); |
38 | - if (phys != -1) { | 21 | + } |
39 | - /* Locks grabbed by tb_invalidate_phys_addr */ | ||
40 | - tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as, | ||
41 | - phys | (pc & ~TARGET_PAGE_MASK), attrs); | ||
42 | - } | ||
43 | + /* | ||
44 | + * There may not be a virtual to physical translation for the pc | ||
45 | + * right now, but there may exist cached TB for this pc. | ||
46 | + * Flush the whole TB cache to force re-translation of such TBs. | ||
47 | + * This is heavyweight, but we're debugging anyway. | ||
48 | + */ | ||
49 | + tb_flush(cpu); | ||
50 | } | 22 | } |
51 | #endif | 23 | |
52 | 24 | static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rs) | |
53 | -- | 25 | -- |
54 | 2.20.1 | 26 | 2.34.1 |
55 | 27 | ||
56 | 28 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | This is common code in most qemu_{ld,st} slow paths, moving two | |
2 | registers when there may be overlap between sources and destinations. | ||
3 | At present, this is only used by 32-bit hosts for 64-bit data, | ||
4 | but will shortly be used for more than that. | ||
5 | |||
6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
7 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
8 | --- | ||
9 | tcg/tcg.c | 69 ++++++++++++++++++++++++++++++++++++--- | ||
10 | tcg/arm/tcg-target.c.inc | 44 ++++++++++--------------- | ||
11 | tcg/i386/tcg-target.c.inc | 19 +++++------ | ||
12 | 3 files changed, 90 insertions(+), 42 deletions(-) | ||
13 | |||
14 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/tcg/tcg.c | ||
17 | +++ b/tcg/tcg.c | ||
18 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); | ||
19 | static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg); | ||
20 | static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg); | ||
21 | static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long); | ||
22 | -static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2) | ||
23 | - __attribute__((unused)); | ||
24 | +static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2); | ||
25 | static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); | ||
26 | static void tcg_out_goto_tb(TCGContext *s, int which); | ||
27 | static void tcg_out_op(TCGContext *s, TCGOpcode opc, | ||
28 | @@ -XXX,XX +XXX,XX @@ void tcg_raise_tb_overflow(TCGContext *s) | ||
29 | siglongjmp(s->jmp_trans, -2); | ||
30 | } | ||
31 | |||
32 | +typedef struct TCGMovExtend { | ||
33 | + TCGReg dst; | ||
34 | + TCGReg src; | ||
35 | + TCGType dst_type; | ||
36 | + TCGType src_type; | ||
37 | + MemOp src_ext; | ||
38 | +} TCGMovExtend; | ||
39 | + | ||
40 | /** | ||
41 | * tcg_out_movext -- move and extend | ||
42 | * @s: tcg context | ||
43 | @@ -XXX,XX +XXX,XX @@ void tcg_raise_tb_overflow(TCGContext *s) | ||
44 | * | ||
45 | * Move or extend @src into @dst, depending on @src_ext and the types. | ||
46 | */ | ||
47 | -static void __attribute__((unused)) | ||
48 | -tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst, | ||
49 | - TCGType src_type, MemOp src_ext, TCGReg src) | ||
50 | +static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst, | ||
51 | + TCGType src_type, MemOp src_ext, TCGReg src) | ||
52 | { | ||
53 | switch (src_ext) { | ||
54 | case MO_UB: | ||
55 | @@ -XXX,XX +XXX,XX @@ tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst, | ||
56 | } | ||
57 | } | ||
58 | |||
59 | +/* Minor variations on a theme, using a structure. */ | ||
60 | +static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i, | ||
61 | + TCGReg src) | ||
62 | +{ | ||
63 | + tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src); | ||
64 | +} | ||
65 | + | ||
66 | +static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i) | ||
67 | +{ | ||
68 | + tcg_out_movext1_new_src(s, i, i->src); | ||
69 | +} | ||
70 | + | ||
71 | +/** | ||
72 | + * tcg_out_movext2 -- move and extend two pair | ||
73 | + * @s: tcg context | ||
74 | + * @i1: first move description | ||
75 | + * @i2: second move description | ||
76 | + * @scratch: temporary register, or -1 for none | ||
77 | + * | ||
78 | + * As tcg_out_movext, for both @i1 and @i2, caring for overlap | ||
79 | + * between the sources and destinations. | ||
80 | + */ | ||
81 | + | ||
82 | +static void __attribute__((unused)) | ||
83 | +tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1, | ||
84 | + const TCGMovExtend *i2, int scratch) | ||
85 | +{ | ||
86 | + TCGReg src1 = i1->src; | ||
87 | + TCGReg src2 = i2->src; | ||
88 | + | ||
89 | + if (i1->dst != src2) { | ||
90 | + tcg_out_movext1(s, i1); | ||
91 | + tcg_out_movext1(s, i2); | ||
92 | + return; | ||
93 | + } | ||
94 | + if (i2->dst == src1) { | ||
95 | + TCGType src1_type = i1->src_type; | ||
96 | + TCGType src2_type = i2->src_type; | ||
97 | + | ||
98 | + if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) { | ||
99 | + /* The data is now in the correct registers, now extend. */ | ||
100 | + src1 = i2->src; | ||
101 | + src2 = i1->src; | ||
102 | + } else { | ||
103 | + tcg_debug_assert(scratch >= 0); | ||
104 | + tcg_out_mov(s, src1_type, scratch, src1); | ||
105 | + src1 = scratch; | ||
106 | + } | ||
107 | + } | ||
108 | + tcg_out_movext1_new_src(s, i2, src2); | ||
109 | + tcg_out_movext1_new_src(s, i1, src1); | ||
110 | +} | ||
111 | + | ||
112 | #define C_PFX1(P, A) P##A | ||
113 | #define C_PFX2(P, A, B) P##A##_##B | ||
114 | #define C_PFX3(P, A, B, C) P##A##_##B##_##C | ||
115 | diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc | ||
116 | index XXXXXXX..XXXXXXX 100644 | ||
117 | --- a/tcg/arm/tcg-target.c.inc | ||
118 | +++ b/tcg/arm/tcg-target.c.inc | ||
119 | @@ -XXX,XX +XXX,XX @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi, | ||
120 | |||
121 | static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) | ||
122 | { | ||
123 | - TCGReg argreg, datalo, datahi; | ||
124 | + TCGReg argreg; | ||
125 | MemOpIdx oi = lb->oi; | ||
126 | MemOp opc = get_memop(oi); | ||
127 | |||
128 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) | ||
129 | /* Use the canonical unsigned helpers and minimize icache usage. */ | ||
130 | tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]); | ||
131 | |||
132 | - datalo = lb->datalo_reg; | ||
133 | - datahi = lb->datahi_reg; | ||
134 | if ((opc & MO_SIZE) == MO_64) { | ||
135 | - if (datalo != TCG_REG_R1) { | ||
136 | - tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0); | ||
137 | - tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1); | ||
138 | - } else if (datahi != TCG_REG_R0) { | ||
139 | - tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1); | ||
140 | - tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0); | ||
141 | - } else { | ||
142 | - tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0); | ||
143 | - tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1); | ||
144 | - tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_TMP); | ||
145 | - } | ||
146 | + TCGMovExtend ext[2] = { | ||
147 | + { .dst = lb->datalo_reg, .dst_type = TCG_TYPE_I32, | ||
148 | + .src = TCG_REG_R0, .src_type = TCG_TYPE_I32, .src_ext = MO_UL }, | ||
149 | + { .dst = lb->datahi_reg, .dst_type = TCG_TYPE_I32, | ||
150 | + .src = TCG_REG_R1, .src_type = TCG_TYPE_I32, .src_ext = MO_UL }, | ||
151 | + }; | ||
152 | + tcg_out_movext2(s, &ext[0], &ext[1], TCG_REG_TMP); | ||
153 | } else { | ||
154 | - tcg_out_movext(s, TCG_TYPE_I32, datalo, | ||
155 | + tcg_out_movext(s, TCG_TYPE_I32, lb->datalo_reg, | ||
156 | TCG_TYPE_I32, opc & MO_SSIZE, TCG_REG_R0); | ||
157 | } | ||
158 | |||
159 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) | ||
160 | |||
161 | if (TARGET_LONG_BITS == 64) { | ||
162 | /* 64-bit target address is aligned into R2:R3. */ | ||
163 | - if (l->addrhi_reg != TCG_REG_R2) { | ||
164 | - tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, l->addrlo_reg); | ||
165 | - tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, l->addrhi_reg); | ||
166 | - } else if (l->addrlo_reg != TCG_REG_R3) { | ||
167 | - tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, l->addrhi_reg); | ||
168 | - tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, l->addrlo_reg); | ||
169 | - } else { | ||
170 | - tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, TCG_REG_R2); | ||
171 | - tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, TCG_REG_R3); | ||
172 | - tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, TCG_REG_R1); | ||
173 | - } | ||
174 | + TCGMovExtend ext[2] = { | ||
175 | + { .dst = TCG_REG_R2, .dst_type = TCG_TYPE_I32, | ||
176 | + .src = l->addrlo_reg, | ||
177 | + .src_type = TCG_TYPE_I32, .src_ext = MO_UL }, | ||
178 | + { .dst = TCG_REG_R3, .dst_type = TCG_TYPE_I32, | ||
179 | + .src = l->addrhi_reg, | ||
180 | + .src_type = TCG_TYPE_I32, .src_ext = MO_UL }, | ||
181 | + }; | ||
182 | + tcg_out_movext2(s, &ext[0], &ext[1], TCG_REG_TMP); | ||
183 | } else { | ||
184 | tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, l->addrlo_reg); | ||
185 | } | ||
186 | diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc | ||
187 | index XXXXXXX..XXXXXXX 100644 | ||
188 | --- a/tcg/i386/tcg-target.c.inc | ||
189 | +++ b/tcg/i386/tcg-target.c.inc | ||
190 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) | ||
191 | { | ||
192 | MemOpIdx oi = l->oi; | ||
193 | MemOp opc = get_memop(oi); | ||
194 | - TCGReg data_reg; | ||
195 | tcg_insn_unit **label_ptr = &l->label_ptr[0]; | ||
196 | |||
197 | /* resolve label address */ | ||
198 | @@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) | ||
199 | |||
200 | tcg_out_branch(s, 1, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); | ||
201 | |||
202 | - data_reg = l->datalo_reg; | ||
203 | if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { | ||
204 | - if (data_reg == TCG_REG_EDX) { | ||
205 | - /* xchg %edx, %eax */ | ||
206 | - tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0); | ||
207 | - tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX); | ||
208 | - } else { | ||
209 | - tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX); | ||
210 | - tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX); | ||
211 | - } | ||
212 | + TCGMovExtend ext[2] = { | ||
213 | + { .dst = l->datalo_reg, .dst_type = TCG_TYPE_I32, | ||
214 | + .src = TCG_REG_EAX, .src_type = TCG_TYPE_I32, .src_ext = MO_UL }, | ||
215 | + { .dst = l->datahi_reg, .dst_type = TCG_TYPE_I32, | ||
216 | + .src = TCG_REG_EDX, .src_type = TCG_TYPE_I32, .src_ext = MO_UL }, | ||
217 | + }; | ||
218 | + tcg_out_movext2(s, &ext[0], &ext[1], -1); | ||
219 | } else { | ||
220 | - tcg_out_movext(s, l->type, data_reg, | ||
221 | + tcg_out_movext(s, l->type, l->datalo_reg, | ||
222 | TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_EAX); | ||
223 | } | ||
224 | |||
225 | -- | ||
226 | 2.34.1 | ||
227 | |||
228 | diff view generated by jsdifflib |