1
The following changes since commit 67e41fe0cfb62e6cdfa659f0155417d17e5274ea:
1
The following changes since commit 7c18f2d663521f1b31b821a13358ce38075eaf7d:
2
2
3
Merge tag 'pull-ppc-20220104' of https://github.com/legoater/qemu into staging (2022-01-04 07:23:27 -0800)
3
Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging (2023-04-29 23:07:17 +0100)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20220104
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230502
8
8
9
for you to fetch changes up to d7478d4229f0a2b2817a55487e6b17081099fae4:
9
for you to fetch changes up to bdc7fba1c5a29ae218b45353daac9308fe1aae82:
10
10
11
common-user: Fix tail calls to safe_syscall_set_errno_tail (2022-01-04 15:41:03 -0800)
11
tcg: Introduce tcg_out_movext2 (2023-05-02 12:15:41 +0100)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Fix for safe_syscall_base.
14
Misc tcg-related patch queue.
15
Fix for folding of vector add/sub.
16
Fix build on loongarch64 with gcc 8.
17
Remove decl for qemu_run_machine_init_done_notifiers.
18
15
19
----------------------------------------------------------------
16
----------------------------------------------------------------
20
Philippe Mathieu-Daudé (1):
17
Dickon Hood (1):
21
linux-user: Fix trivial build error on loongarch64 hosts
18
qemu/bitops.h: Limit rotate amounts
22
19
23
Richard Henderson (2):
20
Kiran Ostrolenk (1):
24
tcg/optimize: Fix folding of vector ops
21
qemu/host-utils.h: Add clz and ctz functions for lower-bit integers
25
common-user: Fix tail calls to safe_syscall_set_errno_tail
26
22
27
Xiaoyao Li (1):
23
Nazar Kazakov (2):
28
sysemu: Cleanup qemu_run_machine_init_done_notifiers()
24
tcg: Add tcg_gen_gvec_andcs
25
tcg: Add tcg_gen_gvec_rotrs
29
26
30
include/sysemu/sysemu.h | 1 -
27
Richard Henderson (7):
31
linux-user/host/loongarch64/host-signal.h | 4 +--
28
softmmu: Tidy dirtylimit_dirty_ring_full_time
32
tcg/optimize.c | 49 +++++++++++++++++++++++-------
29
qemu/int128: Re-shuffle Int128Alias members
33
common-user/host/i386/safe-syscall.inc.S | 1 +
30
migration/xbzrle: Use __attribute__((target)) for avx512
34
common-user/host/mips/safe-syscall.inc.S | 1 +
31
accel/tcg: Add cpu_ld*_code_mmu
35
common-user/host/x86_64/safe-syscall.inc.S | 1 +
32
tcg/loongarch64: Conditionalize tcg_out_exts_i32_i64
36
6 files changed, 42 insertions(+), 15 deletions(-)
33
tcg/mips: Conditionalize tcg_out_exts_i32_i64
34
tcg: Introduce tcg_out_movext2
37
35
36
Weiwei Li (1):
37
accel/tcg: Uncache the host address for instruction fetch when tlb size < 1
38
39
meson.build | 5 +--
40
accel/tcg/tcg-runtime.h | 1 +
41
include/exec/cpu_ldst.h | 9 ++++++
42
include/qemu/bitops.h | 24 +++++++++-----
43
include/qemu/host-utils.h | 54 +++++++++++++++++++++++++++++++
44
include/qemu/int128.h | 4 +--
45
include/tcg/tcg-op-gvec.h | 4 +++
46
accel/tcg/cputlb.c | 53 ++++++++++++++++++++++++++++++
47
accel/tcg/tcg-runtime-gvec.c | 11 +++++++
48
accel/tcg/user-exec.c | 58 +++++++++++++++++++++++++++++++++
49
migration/xbzrle.c | 9 +++---
50
softmmu/dirtylimit.c | 15 ++++++---
51
tcg/tcg-op-gvec.c | 28 ++++++++++++++++
52
tcg/tcg.c | 69 +++++++++++++++++++++++++++++++++++++---
53
tcg/arm/tcg-target.c.inc | 44 +++++++++++--------------
54
tcg/i386/tcg-target.c.inc | 19 +++++------
55
tcg/loongarch64/tcg-target.c.inc | 4 ++-
56
tcg/mips/tcg-target.c.inc | 4 ++-
57
18 files changed, 347 insertions(+), 68 deletions(-)
diff view generated by jsdifflib
New patch
1
Drop inline marker: let compiler decide.
1
2
3
Change return type to uint64_t: this matches the computation in the
4
return statement and the local variable assignment in the caller.
5
6
Rename local to dirty_ring_size_MB to fix typo.
7
Simplify conversion to MiB via qemu_target_page_bits and right shift.
8
9
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
Reviewed-by: Thomas Huth <thuth@redhat.com>
11
Reviewed-by: Juan Quintela <quintela@redhat.com>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
14
softmmu/dirtylimit.c | 15 ++++++++++-----
15
1 file changed, 10 insertions(+), 5 deletions(-)
16
17
diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/softmmu/dirtylimit.c
20
+++ b/softmmu/dirtylimit.c
21
@@ -XXX,XX +XXX,XX @@ bool dirtylimit_vcpu_index_valid(int cpu_index)
22
cpu_index >= ms->smp.max_cpus);
23
}
24
25
-static inline int64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
26
+static uint64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
27
{
28
static uint64_t max_dirtyrate;
29
- uint32_t dirty_ring_size = kvm_dirty_ring_size();
30
- uint64_t dirty_ring_size_meory_MB =
31
- dirty_ring_size * qemu_target_page_size() >> 20;
32
+ unsigned target_page_bits = qemu_target_page_bits();
33
+ uint64_t dirty_ring_size_MB;
34
+
35
+ /* So far, the largest (non-huge) page size is 64k, i.e. 16 bits. */
36
+ assert(target_page_bits < 20);
37
+
38
+ /* Convert ring size (pages) to MiB (2**20). */
39
+ dirty_ring_size_MB = kvm_dirty_ring_size() >> (20 - target_page_bits);
40
41
if (max_dirtyrate < dirtyrate) {
42
max_dirtyrate = dirtyrate;
43
}
44
45
- return dirty_ring_size_meory_MB * 1000000 / max_dirtyrate;
46
+ return dirty_ring_size_MB * 1000000 / max_dirtyrate;
47
}
48
49
static inline bool dirtylimit_done(uint64_t quota,
50
--
51
2.34.1
52
53
diff view generated by jsdifflib
New patch
1
From: Weiwei Li <liweiwei@iscas.ac.cn>
1
2
3
When PMP entry overlap part of the page, we'll set the tlb_size to 1, which
4
will make the address in tlb entry set with TLB_INVALID_MASK, and the next
5
access will again go through tlb_fill.However, this way will not work in
6
tb_gen_code() => get_page_addr_code_hostp(): the TLB host address will be
7
cached, and the following instructions can use this host address directly
8
which may lead to the bypass of PMP related check.
9
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1542.
10
11
Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn>
12
Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn>
13
Reviewed-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
14
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
15
Message-Id: <20230422130329.23555-6-liweiwei@iscas.ac.cn>
16
---
17
accel/tcg/cputlb.c | 5 +++++
18
1 file changed, 5 insertions(+)
19
20
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
21
index XXXXXXX..XXXXXXX 100644
22
--- a/accel/tcg/cputlb.c
23
+++ b/accel/tcg/cputlb.c
24
@@ -XXX,XX +XXX,XX @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
25
if (p == NULL) {
26
return -1;
27
}
28
+
29
+ if (full->lg_page_size < TARGET_PAGE_BITS) {
30
+ return -1;
31
+ }
32
+
33
if (hostp) {
34
*hostp = p;
35
}
36
--
37
2.34.1
diff view generated by jsdifflib
New patch
1
From: Dickon Hood <dickon.hood@codethink.co.uk>
1
2
3
Rotates have been fixed up to only allow for reasonable rotate amounts
4
(ie, no rotates >7 on an 8b value etc.) This fixes a problem with riscv
5
vector rotate instructions.
6
7
Signed-off-by: Dickon Hood <dickon.hood@codethink.co.uk>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-Id: <20230428144757.57530-9-lawrence.hunter@codethink.co.uk>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
12
include/qemu/bitops.h | 24 ++++++++++++++++--------
13
1 file changed, 16 insertions(+), 8 deletions(-)
14
15
diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/include/qemu/bitops.h
18
+++ b/include/qemu/bitops.h
19
@@ -XXX,XX +XXX,XX @@ static inline unsigned long find_first_zero_bit(const unsigned long *addr,
20
*/
21
static inline uint8_t rol8(uint8_t word, unsigned int shift)
22
{
23
- return (word << shift) | (word >> ((8 - shift) & 7));
24
+ shift &= 7;
25
+ return (word << shift) | (word >> (8 - shift));
26
}
27
28
/**
29
@@ -XXX,XX +XXX,XX @@ static inline uint8_t rol8(uint8_t word, unsigned int shift)
30
*/
31
static inline uint8_t ror8(uint8_t word, unsigned int shift)
32
{
33
- return (word >> shift) | (word << ((8 - shift) & 7));
34
+ shift &= 7;
35
+ return (word >> shift) | (word << (8 - shift));
36
}
37
38
/**
39
@@ -XXX,XX +XXX,XX @@ static inline uint8_t ror8(uint8_t word, unsigned int shift)
40
*/
41
static inline uint16_t rol16(uint16_t word, unsigned int shift)
42
{
43
- return (word << shift) | (word >> ((16 - shift) & 15));
44
+ shift &= 15;
45
+ return (word << shift) | (word >> (16 - shift));
46
}
47
48
/**
49
@@ -XXX,XX +XXX,XX @@ static inline uint16_t rol16(uint16_t word, unsigned int shift)
50
*/
51
static inline uint16_t ror16(uint16_t word, unsigned int shift)
52
{
53
- return (word >> shift) | (word << ((16 - shift) & 15));
54
+ shift &= 15;
55
+ return (word >> shift) | (word << (16 - shift));
56
}
57
58
/**
59
@@ -XXX,XX +XXX,XX @@ static inline uint16_t ror16(uint16_t word, unsigned int shift)
60
*/
61
static inline uint32_t rol32(uint32_t word, unsigned int shift)
62
{
63
- return (word << shift) | (word >> ((32 - shift) & 31));
64
+ shift &= 31;
65
+ return (word << shift) | (word >> (32 - shift));
66
}
67
68
/**
69
@@ -XXX,XX +XXX,XX @@ static inline uint32_t rol32(uint32_t word, unsigned int shift)
70
*/
71
static inline uint32_t ror32(uint32_t word, unsigned int shift)
72
{
73
- return (word >> shift) | (word << ((32 - shift) & 31));
74
+ shift &= 31;
75
+ return (word >> shift) | (word << (32 - shift));
76
}
77
78
/**
79
@@ -XXX,XX +XXX,XX @@ static inline uint32_t ror32(uint32_t word, unsigned int shift)
80
*/
81
static inline uint64_t rol64(uint64_t word, unsigned int shift)
82
{
83
- return (word << shift) | (word >> ((64 - shift) & 63));
84
+ shift &= 63;
85
+ return (word << shift) | (word >> (64 - shift));
86
}
87
88
/**
89
@@ -XXX,XX +XXX,XX @@ static inline uint64_t rol64(uint64_t word, unsigned int shift)
90
*/
91
static inline uint64_t ror64(uint64_t word, unsigned int shift)
92
{
93
- return (word >> shift) | (word << ((64 - shift) & 63));
94
+ shift &= 63;
95
+ return (word >> shift) | (word << (64 - shift));
96
}
97
98
/**
99
--
100
2.34.1
diff view generated by jsdifflib
New patch
1
From: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
1
2
3
This is for use in the RISC-V vclz and vctz instructions (implemented in
4
proceeding commit).
5
6
Signed-off-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-Id: <20230428144757.57530-11-lawrence.hunter@codethink.co.uk>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
include/qemu/host-utils.h | 54 +++++++++++++++++++++++++++++++++++++++
12
1 file changed, 54 insertions(+)
13
14
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/include/qemu/host-utils.h
17
+++ b/include/qemu/host-utils.h
18
@@ -XXX,XX +XXX,XX @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
19
}
20
#endif
21
22
+/**
23
+ * clz8 - count leading zeros in a 8-bit value.
24
+ * @val: The value to search
25
+ *
26
+ * Returns 8 if the value is zero. Note that the GCC builtin is
27
+ * undefined if the value is zero.
28
+ *
29
+ * Note that the GCC builtin will upcast its argument to an `unsigned int`
30
+ * so this function subtracts off the number of prepended zeroes.
31
+ */
32
+static inline int clz8(uint8_t val)
33
+{
34
+ return val ? __builtin_clz(val) - 24 : 8;
35
+}
36
+
37
+/**
38
+ * clz16 - count leading zeros in a 16-bit value.
39
+ * @val: The value to search
40
+ *
41
+ * Returns 16 if the value is zero. Note that the GCC builtin is
42
+ * undefined if the value is zero.
43
+ *
44
+ * Note that the GCC builtin will upcast its argument to an `unsigned int`
45
+ * so this function subtracts off the number of prepended zeroes.
46
+ */
47
+static inline int clz16(uint16_t val)
48
+{
49
+ return val ? __builtin_clz(val) - 16 : 16;
50
+}
51
+
52
/**
53
* clz32 - count leading zeros in a 32-bit value.
54
* @val: The value to search
55
@@ -XXX,XX +XXX,XX @@ static inline int clo64(uint64_t val)
56
return clz64(~val);
57
}
58
59
+/**
60
+ * ctz8 - count trailing zeros in a 8-bit value.
61
+ * @val: The value to search
62
+ *
63
+ * Returns 8 if the value is zero. Note that the GCC builtin is
64
+ * undefined if the value is zero.
65
+ */
66
+static inline int ctz8(uint8_t val)
67
+{
68
+ return val ? __builtin_ctz(val) : 8;
69
+}
70
+
71
+/**
72
+ * ctz16 - count trailing zeros in a 16-bit value.
73
+ * @val: The value to search
74
+ *
75
+ * Returns 16 if the value is zero. Note that the GCC builtin is
76
+ * undefined if the value is zero.
77
+ */
78
+static inline int ctz16(uint16_t val)
79
+{
80
+ return val ? __builtin_ctz(val) : 16;
81
+}
82
+
83
/**
84
* ctz32 - count trailing zeros in a 32-bit value.
85
* @val: The value to search
86
--
87
2.34.1
diff view generated by jsdifflib
1
Bitwise operations are easy to fold, because the operation is
1
From: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
2
identical regardless of element size. But add and sub need
3
extra element size info that is not currently propagated.
4
2
5
Fixes: 2f9f08ba43d
3
Add tcg expander and helper functions for and-compliment
6
Cc: qemu-stable@nongnu.org
4
vector with scalar operand.
7
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/799
5
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
7
Message-Id: <20230428144757.57530-10-lawrence.hunter@codethink.co.uk>
8
[rth: Split out of larger patch.]
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
10
---
11
tcg/optimize.c | 49 ++++++++++++++++++++++++++++++++++++++-----------
11
accel/tcg/tcg-runtime.h | 1 +
12
1 file changed, 38 insertions(+), 11 deletions(-)
12
include/tcg/tcg-op-gvec.h | 2 ++
13
accel/tcg/tcg-runtime-gvec.c | 11 +++++++++++
14
tcg/tcg-op-gvec.c | 17 +++++++++++++++++
15
4 files changed, 31 insertions(+)
13
16
14
diff --git a/tcg/optimize.c b/tcg/optimize.c
17
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
15
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/optimize.c
19
--- a/accel/tcg/tcg-runtime.h
17
+++ b/tcg/optimize.c
20
+++ b/accel/tcg/tcg-runtime.h
18
@@ -XXX,XX +XXX,XX @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(gvec_nor, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
19
CASE_OP_32_64(mul):
22
DEF_HELPER_FLAGS_4(gvec_eqv, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
20
return x * y;
23
21
24
DEF_HELPER_FLAGS_4(gvec_ands, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
22
- CASE_OP_32_64(and):
25
+DEF_HELPER_FLAGS_4(gvec_andcs, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
23
+ CASE_OP_32_64_VEC(and):
26
DEF_HELPER_FLAGS_4(gvec_xors, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
24
return x & y;
27
DEF_HELPER_FLAGS_4(gvec_ors, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
25
28
26
- CASE_OP_32_64(or):
29
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
27
+ CASE_OP_32_64_VEC(or):
30
index XXXXXXX..XXXXXXX 100644
28
return x | y;
31
--- a/include/tcg/tcg-op-gvec.h
29
32
+++ b/include/tcg/tcg-op-gvec.h
30
- CASE_OP_32_64(xor):
33
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_ori(unsigned vece, uint32_t dofs, uint32_t aofs,
31
+ CASE_OP_32_64_VEC(xor):
34
32
return x ^ y;
35
void tcg_gen_gvec_ands(unsigned vece, uint32_t dofs, uint32_t aofs,
33
36
TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
34
case INDEX_op_shl_i32:
37
+void tcg_gen_gvec_andcs(unsigned vece, uint32_t dofs, uint32_t aofs,
35
@@ -XXX,XX +XXX,XX @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
38
+ TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
36
case INDEX_op_rotl_i64:
39
void tcg_gen_gvec_xors(unsigned vece, uint32_t dofs, uint32_t aofs,
37
return rol64(x, y & 63);
40
TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
38
41
void tcg_gen_gvec_ors(unsigned vece, uint32_t dofs, uint32_t aofs,
39
- CASE_OP_32_64(not):
42
diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
40
+ CASE_OP_32_64_VEC(not):
43
index XXXXXXX..XXXXXXX 100644
41
return ~x;
44
--- a/accel/tcg/tcg-runtime-gvec.c
42
45
+++ b/accel/tcg/tcg-runtime-gvec.c
43
CASE_OP_32_64(neg):
46
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
44
return -x;
47
clear_high(d, oprsz, desc);
45
46
- CASE_OP_32_64(andc):
47
+ CASE_OP_32_64_VEC(andc):
48
return x & ~y;
49
50
- CASE_OP_32_64(orc):
51
+ CASE_OP_32_64_VEC(orc):
52
return x | ~y;
53
54
CASE_OP_32_64(eqv):
55
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
56
return false;
57
}
48
}
58
49
59
+static bool fold_commutative(OptContext *ctx, TCGOp *op)
50
+void HELPER(gvec_andcs)(void *d, void *a, uint64_t b, uint32_t desc)
60
+{
51
+{
61
+ swap_commutative(op->args[0], &op->args[1], &op->args[2]);
52
+ intptr_t oprsz = simd_oprsz(desc);
62
+ return false;
53
+ intptr_t i;
54
+
55
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
56
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & ~b;
57
+ }
58
+ clear_high(d, oprsz, desc);
63
+}
59
+}
64
+
60
+
65
static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
61
void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
66
{
62
{
67
swap_commutative(op->args[0], &op->args[1], &op->args[2]);
63
intptr_t oprsz = simd_oprsz(desc);
68
@@ -XXX,XX +XXX,XX @@ static bool fold_add(OptContext *ctx, TCGOp *op)
64
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
69
return false;
65
index XXXXXXX..XXXXXXX 100644
66
--- a/tcg/tcg-op-gvec.c
67
+++ b/tcg/tcg-op-gvec.c
68
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_andi(unsigned vece, uint32_t dofs, uint32_t aofs,
69
tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_ands);
70
}
70
}
71
71
72
+/* We cannot as yet do_constant_folding with vectors. */
72
+void tcg_gen_gvec_andcs(unsigned vece, uint32_t dofs, uint32_t aofs,
73
+static bool fold_add_vec(OptContext *ctx, TCGOp *op)
73
+ TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
74
+{
74
+{
75
+ if (fold_commutative(ctx, op) ||
75
+ static GVecGen2s g = {
76
+ fold_xi_to_x(ctx, op, 0)) {
76
+ .fni8 = tcg_gen_andc_i64,
77
+ return true;
77
+ .fniv = tcg_gen_andc_vec,
78
+ }
78
+ .fno = gen_helper_gvec_andcs,
79
+ return false;
79
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
80
+ .vece = MO_64
81
+ };
82
+
83
+ TCGv_i64 tmp = tcg_temp_ebb_new_i64();
84
+ tcg_gen_dup_i64(vece, tmp, c);
85
+ tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, c, &g);
86
+ tcg_temp_free_i64(tmp);
80
+}
87
+}
81
+
88
+
82
static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
89
static const GVecGen2s gop_xors = {
83
{
90
.fni8 = tcg_gen_xor_i64,
84
if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
91
.fniv = tcg_gen_xor_vec,
85
@@ -XXX,XX +XXX,XX @@ static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
86
return false;
87
}
88
89
-static bool fold_sub(OptContext *ctx, TCGOp *op)
90
+/* We cannot as yet do_constant_folding with vectors. */
91
+static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
92
{
93
- if (fold_const2(ctx, op) ||
94
- fold_xx_to_i(ctx, op, 0) ||
95
+ if (fold_xx_to_i(ctx, op, 0) ||
96
fold_xi_to_x(ctx, op, 0) ||
97
fold_sub_to_neg(ctx, op)) {
98
return true;
99
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
100
return false;
101
}
102
103
+static bool fold_sub(OptContext *ctx, TCGOp *op)
104
+{
105
+ return fold_const2(ctx, op) || fold_sub_vec(ctx, op);
106
+}
107
+
108
static bool fold_sub2(OptContext *ctx, TCGOp *op)
109
{
110
return fold_addsub2(ctx, op, false);
111
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
112
* Sorted alphabetically by opcode as much as possible.
113
*/
114
switch (opc) {
115
- CASE_OP_32_64_VEC(add):
116
+ CASE_OP_32_64(add):
117
done = fold_add(&ctx, op);
118
break;
119
+ case INDEX_op_add_vec:
120
+ done = fold_add_vec(&ctx, op);
121
+ break;
122
CASE_OP_32_64(add2):
123
done = fold_add2(&ctx, op);
124
break;
125
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
126
CASE_OP_32_64(sextract):
127
done = fold_sextract(&ctx, op);
128
break;
129
- CASE_OP_32_64_VEC(sub):
130
+ CASE_OP_32_64(sub):
131
done = fold_sub(&ctx, op);
132
break;
133
+ case INDEX_op_sub_vec:
134
+ done = fold_sub_vec(&ctx, op);
135
+ break;
136
CASE_OP_32_64(sub2):
137
done = fold_sub2(&ctx, op);
138
break;
139
--
92
--
140
2.25.1
93
2.34.1
141
142
diff view generated by jsdifflib
New patch
1
From: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
1
2
3
Add tcg expander and helper functions for rotate right
4
vector with scalar operand.
5
6
Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
7
Message-Id: <20230428144757.57530-10-lawrence.hunter@codethink.co.uk>
8
[rth: Split out of larger patch; mask rotation count.]
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
include/tcg/tcg-op-gvec.h | 2 ++
12
tcg/tcg-op-gvec.c | 11 +++++++++++
13
2 files changed, 13 insertions(+)
14
15
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/include/tcg/tcg-op-gvec.h
18
+++ b/include/tcg/tcg-op-gvec.h
19
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_sars(unsigned vece, uint32_t dofs, uint32_t aofs,
20
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
21
void tcg_gen_gvec_rotls(unsigned vece, uint32_t dofs, uint32_t aofs,
22
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
23
+void tcg_gen_gvec_rotrs(unsigned vece, uint32_t dofs, uint32_t aofs,
24
+ TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
25
26
/*
27
* Perform vector shift by vector element, modulo the element size.
28
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/tcg/tcg-op-gvec.c
31
+++ b/tcg/tcg-op-gvec.c
32
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_rotls(unsigned vece, uint32_t dofs, uint32_t aofs,
33
do_gvec_shifts(vece, dofs, aofs, shift, oprsz, maxsz, &g);
34
}
35
36
+void tcg_gen_gvec_rotrs(unsigned vece, uint32_t dofs, uint32_t aofs,
37
+ TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz)
38
+{
39
+ TCGv_i32 tmp = tcg_temp_ebb_new_i32();
40
+
41
+ tcg_gen_neg_i32(tmp, shift);
42
+ tcg_gen_andi_i32(tmp, tmp, (8 << vece) - 1);
43
+ tcg_gen_gvec_rotls(vece, dofs, aofs, tmp, oprsz, maxsz);
44
+ tcg_temp_free_i32(tmp);
45
+}
46
+
47
/*
48
* Expand D = A << (B % element bits)
49
*
50
--
51
2.34.1
diff view generated by jsdifflib
1
For the ABIs in which the syscall return register is not
1
Clang 14, with --enable-tcg-interpreter errors with
2
also the first function argument register, move the errno
3
value into the correct place.
4
2
5
Fixes: a3310c0397e2 ("linux-user: Move syscall error detection into safe_syscall_base")
3
include/qemu/int128.h:487:16: error: alignment of field 'i' (128 bits)
6
Reported-by: Laurent Vivier <laurent@vivier.eu>
4
does not match the alignment of the first field in transparent union;
7
Tested-by: Laurent Vivier <laurent@vivier.eu>
5
transparent_union attribute ignored [-Werror,-Wignored-attributes]
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
__int128_t i;
7
^
8
include/qemu/int128.h:486:12: note: alignment of first field is 64 bits
9
Int128 s;
10
^
11
1 error generated.
12
13
By placing the __uint128_t member first, this is avoided.
14
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
15
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Message-Id: <20220104190454.542225-1-richard.henderson@linaro.org>
16
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
17
Message-Id: <20230501204625.277361-1-richard.henderson@linaro.org>
11
---
18
---
12
common-user/host/i386/safe-syscall.inc.S | 1 +
19
include/qemu/int128.h | 4 ++--
13
common-user/host/mips/safe-syscall.inc.S | 1 +
20
1 file changed, 2 insertions(+), 2 deletions(-)
14
common-user/host/x86_64/safe-syscall.inc.S | 1 +
15
3 files changed, 3 insertions(+)
16
21
17
diff --git a/common-user/host/i386/safe-syscall.inc.S b/common-user/host/i386/safe-syscall.inc.S
22
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
18
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
19
--- a/common-user/host/i386/safe-syscall.inc.S
24
--- a/include/qemu/int128.h
20
+++ b/common-user/host/i386/safe-syscall.inc.S
25
+++ b/include/qemu/int128.h
21
@@ -XXX,XX +XXX,XX @@ safe_syscall_end:
26
@@ -XXX,XX +XXX,XX @@ static inline void bswap128s(Int128 *s)
22
pop %ebp
27
*/
23
.cfi_adjust_cfa_offset -4
28
#ifdef CONFIG_INT128
24
.cfi_restore ebp
29
typedef union {
25
+ mov %eax, (%esp)
30
- Int128 s;
26
jmp safe_syscall_set_errno_tail
31
- __int128_t i;
27
32
__uint128_t u;
28
.cfi_endproc
33
+ __int128_t i;
29
diff --git a/common-user/host/mips/safe-syscall.inc.S b/common-user/host/mips/safe-syscall.inc.S
34
+ Int128 s;
30
index XXXXXXX..XXXXXXX 100644
35
} Int128Alias __attribute__((transparent_union));
31
--- a/common-user/host/mips/safe-syscall.inc.S
36
#else
32
+++ b/common-user/host/mips/safe-syscall.inc.S
37
typedef Int128 Int128Alias;
33
@@ -XXX,XX +XXX,XX @@ safe_syscall_end:
34
1: USE_ALT_CP(t0)
35
SETUP_GPX(t1)
36
SETUP_GPX64(t0, t1)
37
+ move a0, v0
38
PTR_LA t9, safe_syscall_set_errno_tail
39
jr t9
40
41
diff --git a/common-user/host/x86_64/safe-syscall.inc.S b/common-user/host/x86_64/safe-syscall.inc.S
42
index XXXXXXX..XXXXXXX 100644
43
--- a/common-user/host/x86_64/safe-syscall.inc.S
44
+++ b/common-user/host/x86_64/safe-syscall.inc.S
45
@@ -XXX,XX +XXX,XX @@ safe_syscall_end:
46
1: pop %rbp
47
.cfi_def_cfa_offset 8
48
.cfi_restore rbp
49
+ mov %eax, %edi
50
jmp safe_syscall_set_errno_tail
51
.cfi_endproc
52
53
--
38
--
54
2.25.1
39
2.34.1
55
40
56
41
diff view generated by jsdifflib
New patch
1
Use the attribute, which is supported by clang, instead of
2
the #pragma, which is not supported and, for some reason,
3
also not detected by the meson probe, so we fail by -Werror.
1
4
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Juan Quintela <quintela@redhat.com>
7
Message-Id: <20230501210555.289806-1-richard.henderson@linaro.org>
8
---
9
meson.build | 5 +----
10
migration/xbzrle.c | 9 ++++-----
11
2 files changed, 5 insertions(+), 9 deletions(-)
12
13
diff --git a/meson.build b/meson.build
14
index XXXXXXX..XXXXXXX 100644
15
--- a/meson.build
16
+++ b/meson.build
17
@@ -XXX,XX +XXX,XX @@ config_host_data.set('CONFIG_AVX512F_OPT', get_option('avx512f') \
18
config_host_data.set('CONFIG_AVX512BW_OPT', get_option('avx512bw') \
19
.require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX512BW') \
20
.require(cc.links('''
21
- #pragma GCC push_options
22
- #pragma GCC target("avx512bw")
23
#include <cpuid.h>
24
#include <immintrin.h>
25
- static int bar(void *a) {
26
-
27
+ static int __attribute__((target("avx512bw"))) bar(void *a) {
28
__m512i *x = a;
29
__m512i res= _mm512_abs_epi8(*x);
30
return res[1];
31
diff --git a/migration/xbzrle.c b/migration/xbzrle.c
32
index XXXXXXX..XXXXXXX 100644
33
--- a/migration/xbzrle.c
34
+++ b/migration/xbzrle.c
35
@@ -XXX,XX +XXX,XX @@ int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen)
36
}
37
38
#if defined(CONFIG_AVX512BW_OPT)
39
-#pragma GCC push_options
40
-#pragma GCC target("avx512bw")
41
#include <immintrin.h>
42
-int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
43
- uint8_t *dst, int dlen)
44
+
45
+int __attribute__((target("avx512bw")))
46
+xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
47
+ uint8_t *dst, int dlen)
48
{
49
uint32_t zrun_len = 0, nzrun_len = 0;
50
int d = 0, i = 0, num = 0;
51
@@ -XXX,XX +XXX,XX @@ int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
52
}
53
return d;
54
}
55
-#pragma GCC pop_options
56
#endif
57
--
58
2.34.1
diff view generated by jsdifflib
New patch
1
At least RISC-V has the need to be able to perform a read
2
using execute permissions, outside of translation.
3
Add helpers to facilitate this.
1
4
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Acked-by: Alistair Francis <alistair.francis@wdc.com>
7
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
8
Tested-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
9
Message-Id: <20230325105429.1142530-9-richard.henderson@linaro.org>
10
Message-Id: <20230412114333.118895-9-richard.henderson@linaro.org>
11
---
12
include/exec/cpu_ldst.h | 9 +++++++
13
accel/tcg/cputlb.c | 48 ++++++++++++++++++++++++++++++++++
14
accel/tcg/user-exec.c | 58 +++++++++++++++++++++++++++++++++++++++++
15
3 files changed, 115 insertions(+)
16
17
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
18
index XXXXXXX..XXXXXXX 100644
19
--- a/include/exec/cpu_ldst.h
20
+++ b/include/exec/cpu_ldst.h
21
@@ -XXX,XX +XXX,XX @@ static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
22
# define cpu_stq_mmu cpu_stq_le_mmu
23
#endif
24
25
+uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
26
+ MemOpIdx oi, uintptr_t ra);
27
+uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
28
+ MemOpIdx oi, uintptr_t ra);
29
+uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
30
+ MemOpIdx oi, uintptr_t ra);
31
+uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
32
+ MemOpIdx oi, uintptr_t ra);
33
+
34
uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr);
35
uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr);
36
uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr);
37
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/accel/tcg/cputlb.c
40
+++ b/accel/tcg/cputlb.c
41
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr)
42
MemOpIdx oi = make_memop_idx(MO_TEUQ, cpu_mmu_index(env, true));
43
return full_ldq_code(env, addr, oi, 0);
44
}
45
+
46
+uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
47
+ MemOpIdx oi, uintptr_t retaddr)
48
+{
49
+ return full_ldub_code(env, addr, oi, retaddr);
50
+}
51
+
52
+uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
53
+ MemOpIdx oi, uintptr_t retaddr)
54
+{
55
+ MemOp mop = get_memop(oi);
56
+ int idx = get_mmuidx(oi);
57
+ uint16_t ret;
58
+
59
+ ret = full_lduw_code(env, addr, make_memop_idx(MO_TEUW, idx), retaddr);
60
+ if ((mop & MO_BSWAP) != MO_TE) {
61
+ ret = bswap16(ret);
62
+ }
63
+ return ret;
64
+}
65
+
66
+uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
67
+ MemOpIdx oi, uintptr_t retaddr)
68
+{
69
+ MemOp mop = get_memop(oi);
70
+ int idx = get_mmuidx(oi);
71
+ uint32_t ret;
72
+
73
+ ret = full_ldl_code(env, addr, make_memop_idx(MO_TEUL, idx), retaddr);
74
+ if ((mop & MO_BSWAP) != MO_TE) {
75
+ ret = bswap32(ret);
76
+ }
77
+ return ret;
78
+}
79
+
80
+uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
81
+ MemOpIdx oi, uintptr_t retaddr)
82
+{
83
+ MemOp mop = get_memop(oi);
84
+ int idx = get_mmuidx(oi);
85
+ uint64_t ret;
86
+
87
+ ret = full_ldq_code(env, addr, make_memop_idx(MO_TEUQ, idx), retaddr);
88
+ if ((mop & MO_BSWAP) != MO_TE) {
89
+ ret = bswap64(ret);
90
+ }
91
+ return ret;
92
+}
93
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
94
index XXXXXXX..XXXXXXX 100644
95
--- a/accel/tcg/user-exec.c
96
+++ b/accel/tcg/user-exec.c
97
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr ptr)
98
return ret;
99
}
100
101
+uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
102
+ MemOpIdx oi, uintptr_t ra)
103
+{
104
+ void *haddr;
105
+ uint8_t ret;
106
+
107
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_INST_FETCH);
108
+ ret = ldub_p(haddr);
109
+ clear_helper_retaddr();
110
+ return ret;
111
+}
112
+
113
+uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
114
+ MemOpIdx oi, uintptr_t ra)
115
+{
116
+ void *haddr;
117
+ uint16_t ret;
118
+
119
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_INST_FETCH);
120
+ ret = lduw_p(haddr);
121
+ clear_helper_retaddr();
122
+ if (get_memop(oi) & MO_BSWAP) {
123
+ ret = bswap16(ret);
124
+ }
125
+ return ret;
126
+}
127
+
128
+uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
129
+ MemOpIdx oi, uintptr_t ra)
130
+{
131
+ void *haddr;
132
+ uint32_t ret;
133
+
134
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_INST_FETCH);
135
+ ret = ldl_p(haddr);
136
+ clear_helper_retaddr();
137
+ if (get_memop(oi) & MO_BSWAP) {
138
+ ret = bswap32(ret);
139
+ }
140
+ return ret;
141
+}
142
+
143
+uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
144
+ MemOpIdx oi, uintptr_t ra)
145
+{
146
+ void *haddr;
147
+ uint64_t ret;
148
+
149
+ validate_memop(oi, MO_BEUQ);
150
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
151
+ ret = ldq_p(haddr);
152
+ clear_helper_retaddr();
153
+ if (get_memop(oi) & MO_BSWAP) {
154
+ ret = bswap64(ret);
155
+ }
156
+ return ret;
157
+}
158
+
159
#include "ldst_common.c.inc"
160
161
/*
162
--
163
2.34.1
diff view generated by jsdifflib
1
From: Xiaoyao Li <xiaoyao.li@intel.com>
1
Since TCG_TYPE_I32 values are kept sign-extended in registers,
2
via ".w" instructions, we need not extend if the register matches.
3
This is already relied upon by comparisons.
2
4
3
Remove qemu_run_machine_init_done_notifiers() since no implementation
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
and user.
5
6
Fixes: f66dc8737c9 ("vl: move all generic initialization out of vl.c")
7
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
9
Message-Id: <20220104024136.1433545-1-xiaoyao.li@intel.com>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
7
---
12
include/sysemu/sysemu.h | 1 -
8
tcg/loongarch64/tcg-target.c.inc | 4 +++-
13
1 file changed, 1 deletion(-)
9
1 file changed, 3 insertions(+), 1 deletion(-)
14
10
15
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
11
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
16
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
17
--- a/include/sysemu/sysemu.h
13
--- a/tcg/loongarch64/tcg-target.c.inc
18
+++ b/include/sysemu/sysemu.h
14
+++ b/tcg/loongarch64/tcg-target.c.inc
19
@@ -XXX,XX +XXX,XX @@ extern bool qemu_uuid_set;
15
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg)
20
void qemu_add_exit_notifier(Notifier *notify);
16
21
void qemu_remove_exit_notifier(Notifier *notify);
17
static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg)
22
18
{
23
-void qemu_run_machine_init_done_notifiers(void);
19
- tcg_out_ext32s(s, ret, arg);
24
void qemu_add_machine_init_done_notifier(Notifier *notify);
20
+ if (ret != arg) {
25
void qemu_remove_machine_init_done_notifier(Notifier *notify);
21
+ tcg_out_ext32s(s, ret, arg);
26
22
+ }
23
}
24
25
static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg)
27
--
26
--
28
2.25.1
27
2.34.1
29
28
30
29
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <f4bug@amsat.org>
1
Since TCG_TYPE_I32 values are kept sign-extended in registers, we need not
2
extend if the register matches. This is already relied upon by comparisons.
2
3
3
When building using GCC 8.3.0 on loongarch64 (Loongnix) we get:
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
5
In file included from ../linux-user/signal.c:33:
6
../linux-user/host/loongarch64/host-signal.h: In function ‘host_signal_write’:
7
../linux-user/host/loongarch64/host-signal.h:57:9: error: a label can only be part of a statement and a declaration is not a statement
8
uint32_t sel = (insn >> 15) & 0b11111111111;
9
^~~~~~~~
10
11
We don't use the 'sel' variable more than once, so drop it.
12
13
Meson output for the record:
14
15
Host machine cpu family: loongarch64
16
Host machine cpu: loongarch64
17
C compiler for the host machine: cc (gcc 8.3.0 "cc (Loongnix 8.3.0-6.lnd.vec.27) 8.3.0")
18
C linker for the host machine: cc ld.bfd 2.31.1-system
19
20
Fixes: ad812c3bd65 ("linux-user: Implement CPU-specific signal handler for loongarch64 hosts")
21
Reported-by: Song Gao <gaosong@loongson.cn>
22
Suggested-by: Song Gao <gaosong@loongson.cn>
23
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
24
Reviewed-by: WANG Xuerui <git@xen0n.name>
25
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
26
Message-Id: <20220104215027.2180972-1-f4bug@amsat.org>
27
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
28
---
6
---
29
linux-user/host/loongarch64/host-signal.h | 4 +---
7
tcg/mips/tcg-target.c.inc | 4 +++-
30
1 file changed, 1 insertion(+), 3 deletions(-)
8
1 file changed, 3 insertions(+), 1 deletion(-)
31
9
32
diff --git a/linux-user/host/loongarch64/host-signal.h b/linux-user/host/loongarch64/host-signal.h
10
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
33
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
34
--- a/linux-user/host/loongarch64/host-signal.h
12
--- a/tcg/mips/tcg-target.c.inc
35
+++ b/linux-user/host/loongarch64/host-signal.h
13
+++ b/tcg/mips/tcg-target.c.inc
36
@@ -XXX,XX +XXX,XX @@ static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc)
14
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rs)
37
}
15
38
break;
16
static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rs)
39
case 0b001110: /* indexed, atomic, bounds-checking memory operations */
17
{
40
- uint32_t sel = (insn >> 15) & 0b11111111111;
18
- tcg_out_ext32s(s, rd, rs);
41
-
19
+ if (rd != rs) {
42
- switch (sel) {
20
+ tcg_out_ext32s(s, rd, rs);
43
+ switch ((insn >> 15) & 0b11111111111) {
21
+ }
44
case 0b00000100000: /* stx.b */
22
}
45
case 0b00000101000: /* stx.h */
23
46
case 0b00000110000: /* stx.w */
24
static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rs)
47
--
25
--
48
2.25.1
26
2.34.1
49
27
50
28
diff view generated by jsdifflib
New patch
1
1
This is common code in most qemu_{ld,st} slow paths, moving two
2
registers when there may be overlap between sources and destinations.
3
At present, this is only used by 32-bit hosts for 64-bit data,
4
but will shortly be used for more than that.
5
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tcg/tcg.c | 69 ++++++++++++++++++++++++++++++++++++---
10
tcg/arm/tcg-target.c.inc | 44 ++++++++++---------------
11
tcg/i386/tcg-target.c.inc | 19 +++++------
12
3 files changed, 90 insertions(+), 42 deletions(-)
13
14
diff --git a/tcg/tcg.c b/tcg/tcg.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/tcg.c
17
+++ b/tcg/tcg.c
18
@@ -XXX,XX +XXX,XX @@ static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
19
static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
20
static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
21
static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
22
-static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
23
- __attribute__((unused));
24
+static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
25
static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
26
static void tcg_out_goto_tb(TCGContext *s, int which);
27
static void tcg_out_op(TCGContext *s, TCGOpcode opc,
28
@@ -XXX,XX +XXX,XX @@ void tcg_raise_tb_overflow(TCGContext *s)
29
siglongjmp(s->jmp_trans, -2);
30
}
31
32
+typedef struct TCGMovExtend {
33
+ TCGReg dst;
34
+ TCGReg src;
35
+ TCGType dst_type;
36
+ TCGType src_type;
37
+ MemOp src_ext;
38
+} TCGMovExtend;
39
+
40
/**
41
* tcg_out_movext -- move and extend
42
* @s: tcg context
43
@@ -XXX,XX +XXX,XX @@ void tcg_raise_tb_overflow(TCGContext *s)
44
*
45
* Move or extend @src into @dst, depending on @src_ext and the types.
46
*/
47
-static void __attribute__((unused))
48
-tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
49
- TCGType src_type, MemOp src_ext, TCGReg src)
50
+static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
51
+ TCGType src_type, MemOp src_ext, TCGReg src)
52
{
53
switch (src_ext) {
54
case MO_UB:
55
@@ -XXX,XX +XXX,XX @@ tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
56
}
57
}
58
59
+/* Minor variations on a theme, using a structure. */
60
+static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
61
+ TCGReg src)
62
+{
63
+ tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
64
+}
65
+
66
+static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
67
+{
68
+ tcg_out_movext1_new_src(s, i, i->src);
69
+}
70
+
71
+/**
72
+ * tcg_out_movext2 -- move and extend two pair
73
+ * @s: tcg context
74
+ * @i1: first move description
75
+ * @i2: second move description
76
+ * @scratch: temporary register, or -1 for none
77
+ *
78
+ * As tcg_out_movext, for both @i1 and @i2, caring for overlap
79
+ * between the sources and destinations.
80
+ */
81
+
82
+static void __attribute__((unused))
83
+tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
84
+ const TCGMovExtend *i2, int scratch)
85
+{
86
+ TCGReg src1 = i1->src;
87
+ TCGReg src2 = i2->src;
88
+
89
+ if (i1->dst != src2) {
90
+ tcg_out_movext1(s, i1);
91
+ tcg_out_movext1(s, i2);
92
+ return;
93
+ }
94
+ if (i2->dst == src1) {
95
+ TCGType src1_type = i1->src_type;
96
+ TCGType src2_type = i2->src_type;
97
+
98
+ if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
99
+ /* The data is now in the correct registers, now extend. */
100
+ src1 = i2->src;
101
+ src2 = i1->src;
102
+ } else {
103
+ tcg_debug_assert(scratch >= 0);
104
+ tcg_out_mov(s, src1_type, scratch, src1);
105
+ src1 = scratch;
106
+ }
107
+ }
108
+ tcg_out_movext1_new_src(s, i2, src2);
109
+ tcg_out_movext1_new_src(s, i1, src1);
110
+}
111
+
112
#define C_PFX1(P, A) P##A
113
#define C_PFX2(P, A, B) P##A##_##B
114
#define C_PFX3(P, A, B, C) P##A##_##B##_##C
115
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
116
index XXXXXXX..XXXXXXX 100644
117
--- a/tcg/arm/tcg-target.c.inc
118
+++ b/tcg/arm/tcg-target.c.inc
119
@@ -XXX,XX +XXX,XX @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
120
121
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
122
{
123
- TCGReg argreg, datalo, datahi;
124
+ TCGReg argreg;
125
MemOpIdx oi = lb->oi;
126
MemOp opc = get_memop(oi);
127
128
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
129
/* Use the canonical unsigned helpers and minimize icache usage. */
130
tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
131
132
- datalo = lb->datalo_reg;
133
- datahi = lb->datahi_reg;
134
if ((opc & MO_SIZE) == MO_64) {
135
- if (datalo != TCG_REG_R1) {
136
- tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
137
- tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
138
- } else if (datahi != TCG_REG_R0) {
139
- tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
140
- tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
141
- } else {
142
- tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0);
143
- tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
144
- tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_TMP);
145
- }
146
+ TCGMovExtend ext[2] = {
147
+ { .dst = lb->datalo_reg, .dst_type = TCG_TYPE_I32,
148
+ .src = TCG_REG_R0, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
149
+ { .dst = lb->datahi_reg, .dst_type = TCG_TYPE_I32,
150
+ .src = TCG_REG_R1, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
151
+ };
152
+ tcg_out_movext2(s, &ext[0], &ext[1], TCG_REG_TMP);
153
} else {
154
- tcg_out_movext(s, TCG_TYPE_I32, datalo,
155
+ tcg_out_movext(s, TCG_TYPE_I32, lb->datalo_reg,
156
TCG_TYPE_I32, opc & MO_SSIZE, TCG_REG_R0);
157
}
158
159
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
160
161
if (TARGET_LONG_BITS == 64) {
162
/* 64-bit target address is aligned into R2:R3. */
163
- if (l->addrhi_reg != TCG_REG_R2) {
164
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, l->addrlo_reg);
165
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, l->addrhi_reg);
166
- } else if (l->addrlo_reg != TCG_REG_R3) {
167
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, l->addrhi_reg);
168
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, l->addrlo_reg);
169
- } else {
170
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, TCG_REG_R2);
171
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, TCG_REG_R3);
172
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, TCG_REG_R1);
173
- }
174
+ TCGMovExtend ext[2] = {
175
+ { .dst = TCG_REG_R2, .dst_type = TCG_TYPE_I32,
176
+ .src = l->addrlo_reg,
177
+ .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
178
+ { .dst = TCG_REG_R3, .dst_type = TCG_TYPE_I32,
179
+ .src = l->addrhi_reg,
180
+ .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
181
+ };
182
+ tcg_out_movext2(s, &ext[0], &ext[1], TCG_REG_TMP);
183
} else {
184
tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, l->addrlo_reg);
185
}
186
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
187
index XXXXXXX..XXXXXXX 100644
188
--- a/tcg/i386/tcg-target.c.inc
189
+++ b/tcg/i386/tcg-target.c.inc
190
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
191
{
192
MemOpIdx oi = l->oi;
193
MemOp opc = get_memop(oi);
194
- TCGReg data_reg;
195
tcg_insn_unit **label_ptr = &l->label_ptr[0];
196
197
/* resolve label address */
198
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
199
200
tcg_out_branch(s, 1, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
201
202
- data_reg = l->datalo_reg;
203
if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
204
- if (data_reg == TCG_REG_EDX) {
205
- /* xchg %edx, %eax */
206
- tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
207
- tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
208
- } else {
209
- tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
210
- tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
211
- }
212
+ TCGMovExtend ext[2] = {
213
+ { .dst = l->datalo_reg, .dst_type = TCG_TYPE_I32,
214
+ .src = TCG_REG_EAX, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
215
+ { .dst = l->datahi_reg, .dst_type = TCG_TYPE_I32,
216
+ .src = TCG_REG_EDX, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
217
+ };
218
+ tcg_out_movext2(s, &ext[0], &ext[1], -1);
219
} else {
220
- tcg_out_movext(s, l->type, data_reg,
221
+ tcg_out_movext(s, l->type, l->datalo_reg,
222
TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_EAX);
223
}
224
225
--
226
2.34.1
227
228
diff view generated by jsdifflib