1
The following changes since commit e18e5501d8ac692d32657a3e1ef545b14e72b730:
1
The following changes since commit 7c18f2d663521f1b31b821a13358ce38075eaf7d:
2
2
3
Merge remote-tracking branch 'remotes/dgilbert-gitlab/tags/pull-virtiofs-20200210' into staging (2020-02-10 18:09:14 +0000)
3
Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging (2023-04-29 23:07:17 +0100)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://github.com/rth7680/qemu.git tags/pull-tcg-20200212
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230502
8
8
9
for you to fetch changes up to 2445971604c1cfd3ec484457159f4ac300fb04d2:
9
for you to fetch changes up to bdc7fba1c5a29ae218b45353daac9308fe1aae82:
10
10
11
tcg: Add tcg_gen_gvec_5_ptr (2020-02-12 14:58:36 -0800)
11
tcg: Introduce tcg_out_movext2 (2023-05-02 12:15:41 +0100)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Fix breakpoint invalidation.
14
Misc tcg-related patch queue.
15
Add support for tcg helpers with 7 arguments.
16
Add support for gvec helpers with 5 arguments.
17
15
18
----------------------------------------------------------------
16
----------------------------------------------------------------
19
Max Filippov (1):
17
Dickon Hood (1):
20
exec: flush CPU TB cache in breakpoint_invalidate
18
qemu/bitops.h: Limit rotate amounts
21
19
22
Richard Henderson (1):
20
Kiran Ostrolenk (1):
23
tcg: Add tcg_gen_gvec_5_ptr
21
qemu/host-utils.h: Add clz and ctz functions for lower-bit integers
24
22
25
Taylor Simpson (1):
23
Nazar Kazakov (2):
26
tcg: Add support for a helper with 7 arguments
24
tcg: Add tcg_gen_gvec_andcs
25
tcg: Add tcg_gen_gvec_rotrs
27
26
28
include/exec/helper-gen.h | 13 +++++++++++++
27
Richard Henderson (7):
29
include/exec/helper-head.h | 2 ++
28
softmmu: Tidy dirtylimit_dirty_ring_full_time
30
include/exec/helper-proto.h | 6 ++++++
29
qemu/int128: Re-shuffle Int128Alias members
31
include/exec/helper-tcg.h | 7 +++++++
30
migration/xbzrle: Use __attribute__((target)) for avx512
32
include/tcg/tcg-op-gvec.h | 7 +++++++
31
accel/tcg: Add cpu_ld*_code_mmu
33
exec.c | 15 +++++++--------
32
tcg/loongarch64: Conditionalize tcg_out_exts_i32_i64
34
tcg/tcg-op-gvec.c | 32 ++++++++++++++++++++++++++++++++
33
tcg/mips: Conditionalize tcg_out_exts_i32_i64
35
7 files changed, 74 insertions(+), 8 deletions(-)
34
tcg: Introduce tcg_out_movext2
36
35
36
Weiwei Li (1):
37
accel/tcg: Uncache the host address for instruction fetch when tlb size < 1
38
39
meson.build | 5 +--
40
accel/tcg/tcg-runtime.h | 1 +
41
include/exec/cpu_ldst.h | 9 ++++++
42
include/qemu/bitops.h | 24 +++++++++-----
43
include/qemu/host-utils.h | 54 +++++++++++++++++++++++++++++++
44
include/qemu/int128.h | 4 +--
45
include/tcg/tcg-op-gvec.h | 4 +++
46
accel/tcg/cputlb.c | 53 ++++++++++++++++++++++++++++++
47
accel/tcg/tcg-runtime-gvec.c | 11 +++++++
48
accel/tcg/user-exec.c | 58 +++++++++++++++++++++++++++++++++
49
migration/xbzrle.c | 9 +++---
50
softmmu/dirtylimit.c | 15 ++++++---
51
tcg/tcg-op-gvec.c | 28 ++++++++++++++++
52
tcg/tcg.c | 69 +++++++++++++++++++++++++++++++++++++---
53
tcg/arm/tcg-target.c.inc | 44 +++++++++++--------------
54
tcg/i386/tcg-target.c.inc | 19 +++++------
55
tcg/loongarch64/tcg-target.c.inc | 4 ++-
56
tcg/mips/tcg-target.c.inc | 4 ++-
57
18 files changed, 347 insertions(+), 68 deletions(-)
diff view generated by jsdifflib
New patch
1
Drop inline marker: let compiler decide.
1
2
3
Change return type to uint64_t: this matches the computation in the
4
return statement and the local variable assignment in the caller.
5
6
Rename local to dirty_ring_size_MB to fix typo.
7
Simplify conversion to MiB via qemu_target_page_bits and right shift.
8
9
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
Reviewed-by: Thomas Huth <thuth@redhat.com>
11
Reviewed-by: Juan Quintela <quintela@redhat.com>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
14
softmmu/dirtylimit.c | 15 ++++++++++-----
15
1 file changed, 10 insertions(+), 5 deletions(-)
16
17
diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/softmmu/dirtylimit.c
20
+++ b/softmmu/dirtylimit.c
21
@@ -XXX,XX +XXX,XX @@ bool dirtylimit_vcpu_index_valid(int cpu_index)
22
cpu_index >= ms->smp.max_cpus);
23
}
24
25
-static inline int64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
26
+static uint64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
27
{
28
static uint64_t max_dirtyrate;
29
- uint32_t dirty_ring_size = kvm_dirty_ring_size();
30
- uint64_t dirty_ring_size_meory_MB =
31
- dirty_ring_size * qemu_target_page_size() >> 20;
32
+ unsigned target_page_bits = qemu_target_page_bits();
33
+ uint64_t dirty_ring_size_MB;
34
+
35
+ /* So far, the largest (non-huge) page size is 64k, i.e. 16 bits. */
36
+ assert(target_page_bits < 20);
37
+
38
+ /* Convert ring size (pages) to MiB (2**20). */
39
+ dirty_ring_size_MB = kvm_dirty_ring_size() >> (20 - target_page_bits);
40
41
if (max_dirtyrate < dirtyrate) {
42
max_dirtyrate = dirtyrate;
43
}
44
45
- return dirty_ring_size_meory_MB * 1000000 / max_dirtyrate;
46
+ return dirty_ring_size_MB * 1000000 / max_dirtyrate;
47
}
48
49
static inline bool dirtylimit_done(uint64_t quota,
50
--
51
2.34.1
52
53
diff view generated by jsdifflib
New patch
1
From: Weiwei Li <liweiwei@iscas.ac.cn>
1
2
3
When PMP entry overlap part of the page, we'll set the tlb_size to 1, which
4
will make the address in tlb entry set with TLB_INVALID_MASK, and the next
5
access will again go through tlb_fill.However, this way will not work in
6
tb_gen_code() => get_page_addr_code_hostp(): the TLB host address will be
7
cached, and the following instructions can use this host address directly
8
which may lead to the bypass of PMP related check.
9
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1542.
10
11
Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn>
12
Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn>
13
Reviewed-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
14
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
15
Message-Id: <20230422130329.23555-6-liweiwei@iscas.ac.cn>
16
---
17
accel/tcg/cputlb.c | 5 +++++
18
1 file changed, 5 insertions(+)
19
20
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
21
index XXXXXXX..XXXXXXX 100644
22
--- a/accel/tcg/cputlb.c
23
+++ b/accel/tcg/cputlb.c
24
@@ -XXX,XX +XXX,XX @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
25
if (p == NULL) {
26
return -1;
27
}
28
+
29
+ if (full->lg_page_size < TARGET_PAGE_BITS) {
30
+ return -1;
31
+ }
32
+
33
if (hostp) {
34
*hostp = p;
35
}
36
--
37
2.34.1
diff view generated by jsdifflib
New patch
1
From: Dickon Hood <dickon.hood@codethink.co.uk>
1
2
3
Rotates have been fixed up to only allow for reasonable rotate amounts
4
(ie, no rotates >7 on an 8b value etc.) This fixes a problem with riscv
5
vector rotate instructions.
6
7
Signed-off-by: Dickon Hood <dickon.hood@codethink.co.uk>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-Id: <20230428144757.57530-9-lawrence.hunter@codethink.co.uk>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
12
include/qemu/bitops.h | 24 ++++++++++++++++--------
13
1 file changed, 16 insertions(+), 8 deletions(-)
14
15
diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/include/qemu/bitops.h
18
+++ b/include/qemu/bitops.h
19
@@ -XXX,XX +XXX,XX @@ static inline unsigned long find_first_zero_bit(const unsigned long *addr,
20
*/
21
static inline uint8_t rol8(uint8_t word, unsigned int shift)
22
{
23
- return (word << shift) | (word >> ((8 - shift) & 7));
24
+ shift &= 7;
25
+ return (word << shift) | (word >> (8 - shift));
26
}
27
28
/**
29
@@ -XXX,XX +XXX,XX @@ static inline uint8_t rol8(uint8_t word, unsigned int shift)
30
*/
31
static inline uint8_t ror8(uint8_t word, unsigned int shift)
32
{
33
- return (word >> shift) | (word << ((8 - shift) & 7));
34
+ shift &= 7;
35
+ return (word >> shift) | (word << (8 - shift));
36
}
37
38
/**
39
@@ -XXX,XX +XXX,XX @@ static inline uint8_t ror8(uint8_t word, unsigned int shift)
40
*/
41
static inline uint16_t rol16(uint16_t word, unsigned int shift)
42
{
43
- return (word << shift) | (word >> ((16 - shift) & 15));
44
+ shift &= 15;
45
+ return (word << shift) | (word >> (16 - shift));
46
}
47
48
/**
49
@@ -XXX,XX +XXX,XX @@ static inline uint16_t rol16(uint16_t word, unsigned int shift)
50
*/
51
static inline uint16_t ror16(uint16_t word, unsigned int shift)
52
{
53
- return (word >> shift) | (word << ((16 - shift) & 15));
54
+ shift &= 15;
55
+ return (word >> shift) | (word << (16 - shift));
56
}
57
58
/**
59
@@ -XXX,XX +XXX,XX @@ static inline uint16_t ror16(uint16_t word, unsigned int shift)
60
*/
61
static inline uint32_t rol32(uint32_t word, unsigned int shift)
62
{
63
- return (word << shift) | (word >> ((32 - shift) & 31));
64
+ shift &= 31;
65
+ return (word << shift) | (word >> (32 - shift));
66
}
67
68
/**
69
@@ -XXX,XX +XXX,XX @@ static inline uint32_t rol32(uint32_t word, unsigned int shift)
70
*/
71
static inline uint32_t ror32(uint32_t word, unsigned int shift)
72
{
73
- return (word >> shift) | (word << ((32 - shift) & 31));
74
+ shift &= 31;
75
+ return (word >> shift) | (word << (32 - shift));
76
}
77
78
/**
79
@@ -XXX,XX +XXX,XX @@ static inline uint32_t ror32(uint32_t word, unsigned int shift)
80
*/
81
static inline uint64_t rol64(uint64_t word, unsigned int shift)
82
{
83
- return (word << shift) | (word >> ((64 - shift) & 63));
84
+ shift &= 63;
85
+ return (word << shift) | (word >> (64 - shift));
86
}
87
88
/**
89
@@ -XXX,XX +XXX,XX @@ static inline uint64_t rol64(uint64_t word, unsigned int shift)
90
*/
91
static inline uint64_t ror64(uint64_t word, unsigned int shift)
92
{
93
- return (word >> shift) | (word << ((64 - shift) & 63));
94
+ shift &= 63;
95
+ return (word >> shift) | (word << (64 - shift));
96
}
97
98
/**
99
--
100
2.34.1
diff view generated by jsdifflib
New patch
1
From: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
1
2
3
This is for use in the RISC-V vclz and vctz instructions (implemented in
4
proceeding commit).
5
6
Signed-off-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-Id: <20230428144757.57530-11-lawrence.hunter@codethink.co.uk>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
include/qemu/host-utils.h | 54 +++++++++++++++++++++++++++++++++++++++
12
1 file changed, 54 insertions(+)
13
14
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/include/qemu/host-utils.h
17
+++ b/include/qemu/host-utils.h
18
@@ -XXX,XX +XXX,XX @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
19
}
20
#endif
21
22
+/**
23
+ * clz8 - count leading zeros in a 8-bit value.
24
+ * @val: The value to search
25
+ *
26
+ * Returns 8 if the value is zero. Note that the GCC builtin is
27
+ * undefined if the value is zero.
28
+ *
29
+ * Note that the GCC builtin will upcast its argument to an `unsigned int`
30
+ * so this function subtracts off the number of prepended zeroes.
31
+ */
32
+static inline int clz8(uint8_t val)
33
+{
34
+ return val ? __builtin_clz(val) - 24 : 8;
35
+}
36
+
37
+/**
38
+ * clz16 - count leading zeros in a 16-bit value.
39
+ * @val: The value to search
40
+ *
41
+ * Returns 16 if the value is zero. Note that the GCC builtin is
42
+ * undefined if the value is zero.
43
+ *
44
+ * Note that the GCC builtin will upcast its argument to an `unsigned int`
45
+ * so this function subtracts off the number of prepended zeroes.
46
+ */
47
+static inline int clz16(uint16_t val)
48
+{
49
+ return val ? __builtin_clz(val) - 16 : 16;
50
+}
51
+
52
/**
53
* clz32 - count leading zeros in a 32-bit value.
54
* @val: The value to search
55
@@ -XXX,XX +XXX,XX @@ static inline int clo64(uint64_t val)
56
return clz64(~val);
57
}
58
59
+/**
60
+ * ctz8 - count trailing zeros in a 8-bit value.
61
+ * @val: The value to search
62
+ *
63
+ * Returns 8 if the value is zero. Note that the GCC builtin is
64
+ * undefined if the value is zero.
65
+ */
66
+static inline int ctz8(uint8_t val)
67
+{
68
+ return val ? __builtin_ctz(val) : 8;
69
+}
70
+
71
+/**
72
+ * ctz16 - count trailing zeros in a 16-bit value.
73
+ * @val: The value to search
74
+ *
75
+ * Returns 16 if the value is zero. Note that the GCC builtin is
76
+ * undefined if the value is zero.
77
+ */
78
+static inline int ctz16(uint16_t val)
79
+{
80
+ return val ? __builtin_ctz(val) : 16;
81
+}
82
+
83
/**
84
* ctz32 - count trailing zeros in a 32-bit value.
85
* @val: The value to search
86
--
87
2.34.1
diff view generated by jsdifflib
New patch
1
From: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
1
2
3
Add tcg expander and helper functions for and-compliment
4
vector with scalar operand.
5
6
Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
7
Message-Id: <20230428144757.57530-10-lawrence.hunter@codethink.co.uk>
8
[rth: Split out of larger patch.]
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
accel/tcg/tcg-runtime.h | 1 +
12
include/tcg/tcg-op-gvec.h | 2 ++
13
accel/tcg/tcg-runtime-gvec.c | 11 +++++++++++
14
tcg/tcg-op-gvec.c | 17 +++++++++++++++++
15
4 files changed, 31 insertions(+)
16
17
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
18
index XXXXXXX..XXXXXXX 100644
19
--- a/accel/tcg/tcg-runtime.h
20
+++ b/accel/tcg/tcg-runtime.h
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(gvec_nor, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
22
DEF_HELPER_FLAGS_4(gvec_eqv, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
23
24
DEF_HELPER_FLAGS_4(gvec_ands, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
25
+DEF_HELPER_FLAGS_4(gvec_andcs, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
26
DEF_HELPER_FLAGS_4(gvec_xors, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
27
DEF_HELPER_FLAGS_4(gvec_ors, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
28
29
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
30
index XXXXXXX..XXXXXXX 100644
31
--- a/include/tcg/tcg-op-gvec.h
32
+++ b/include/tcg/tcg-op-gvec.h
33
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_ori(unsigned vece, uint32_t dofs, uint32_t aofs,
34
35
void tcg_gen_gvec_ands(unsigned vece, uint32_t dofs, uint32_t aofs,
36
TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
37
+void tcg_gen_gvec_andcs(unsigned vece, uint32_t dofs, uint32_t aofs,
38
+ TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
39
void tcg_gen_gvec_xors(unsigned vece, uint32_t dofs, uint32_t aofs,
40
TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
41
void tcg_gen_gvec_ors(unsigned vece, uint32_t dofs, uint32_t aofs,
42
diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/accel/tcg/tcg-runtime-gvec.c
45
+++ b/accel/tcg/tcg-runtime-gvec.c
46
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
47
clear_high(d, oprsz, desc);
48
}
49
50
+void HELPER(gvec_andcs)(void *d, void *a, uint64_t b, uint32_t desc)
51
+{
52
+ intptr_t oprsz = simd_oprsz(desc);
53
+ intptr_t i;
54
+
55
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
56
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & ~b;
57
+ }
58
+ clear_high(d, oprsz, desc);
59
+}
60
+
61
void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
62
{
63
intptr_t oprsz = simd_oprsz(desc);
64
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
65
index XXXXXXX..XXXXXXX 100644
66
--- a/tcg/tcg-op-gvec.c
67
+++ b/tcg/tcg-op-gvec.c
68
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_andi(unsigned vece, uint32_t dofs, uint32_t aofs,
69
tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_ands);
70
}
71
72
+void tcg_gen_gvec_andcs(unsigned vece, uint32_t dofs, uint32_t aofs,
73
+ TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
74
+{
75
+ static GVecGen2s g = {
76
+ .fni8 = tcg_gen_andc_i64,
77
+ .fniv = tcg_gen_andc_vec,
78
+ .fno = gen_helper_gvec_andcs,
79
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
80
+ .vece = MO_64
81
+ };
82
+
83
+ TCGv_i64 tmp = tcg_temp_ebb_new_i64();
84
+ tcg_gen_dup_i64(vece, tmp, c);
85
+ tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, c, &g);
86
+ tcg_temp_free_i64(tmp);
87
+}
88
+
89
static const GVecGen2s gop_xors = {
90
.fni8 = tcg_gen_xor_i64,
91
.fniv = tcg_gen_xor_vec,
92
--
93
2.34.1
diff view generated by jsdifflib
1
Extend the vector generator infrastructure to handle
1
From: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
2
5 vector arguments.
3
2
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
3
Add tcg expander and helper functions for rotate right
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
vector with scalar operand.
6
Reviewed-by: Taylor Simpson <tsimpson@quicinc.com>
5
6
Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
7
Message-Id: <20230428144757.57530-10-lawrence.hunter@codethink.co.uk>
8
[rth: Split out of larger patch; mask rotation count.]
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
10
---
9
include/tcg/tcg-op-gvec.h | 7 +++++++
11
include/tcg/tcg-op-gvec.h | 2 ++
10
tcg/tcg-op-gvec.c | 32 ++++++++++++++++++++++++++++++++
12
tcg/tcg-op-gvec.c | 11 +++++++++++
11
2 files changed, 39 insertions(+)
13
2 files changed, 13 insertions(+)
12
14
13
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
15
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
14
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
15
--- a/include/tcg/tcg-op-gvec.h
17
--- a/include/tcg/tcg-op-gvec.h
16
+++ b/include/tcg/tcg-op-gvec.h
18
+++ b/include/tcg/tcg-op-gvec.h
17
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_4_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
19
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_sars(unsigned vece, uint32_t dofs, uint32_t aofs,
18
uint32_t maxsz, int32_t data,
20
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
19
gen_helper_gvec_4_ptr *fn);
21
void tcg_gen_gvec_rotls(unsigned vece, uint32_t dofs, uint32_t aofs,
20
22
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
21
+typedef void gen_helper_gvec_5_ptr(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr,
23
+void tcg_gen_gvec_rotrs(unsigned vece, uint32_t dofs, uint32_t aofs,
22
+ TCGv_ptr, TCGv_ptr, TCGv_i32);
24
+ TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
23
+void tcg_gen_gvec_5_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
25
24
+ uint32_t cofs, uint32_t eofs, TCGv_ptr ptr,
26
/*
25
+ uint32_t oprsz, uint32_t maxsz, int32_t data,
27
* Perform vector shift by vector element, modulo the element size.
26
+ gen_helper_gvec_5_ptr *fn);
27
+
28
/* Expand a gvec operation. Either inline or out-of-line depending on
29
the actual vector size and the operations supported by the host. */
30
typedef struct {
31
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
28
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
32
index XXXXXXX..XXXXXXX 100644
29
index XXXXXXX..XXXXXXX 100644
33
--- a/tcg/tcg-op-gvec.c
30
--- a/tcg/tcg-op-gvec.c
34
+++ b/tcg/tcg-op-gvec.c
31
+++ b/tcg/tcg-op-gvec.c
35
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_4_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
32
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_rotls(unsigned vece, uint32_t dofs, uint32_t aofs,
36
tcg_temp_free_i32(desc);
33
do_gvec_shifts(vece, dofs, aofs, shift, oprsz, maxsz, &g);
37
}
34
}
38
35
39
+/* Generate a call to a gvec-style helper with five vector operands
36
+void tcg_gen_gvec_rotrs(unsigned vece, uint32_t dofs, uint32_t aofs,
40
+ and an extra pointer operand. */
37
+ TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz)
41
+void tcg_gen_gvec_5_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
42
+ uint32_t cofs, uint32_t eofs, TCGv_ptr ptr,
43
+ uint32_t oprsz, uint32_t maxsz, int32_t data,
44
+ gen_helper_gvec_5_ptr *fn)
45
+{
38
+{
46
+ TCGv_ptr a0, a1, a2, a3, a4;
39
+ TCGv_i32 tmp = tcg_temp_ebb_new_i32();
47
+ TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, data));
48
+
40
+
49
+ a0 = tcg_temp_new_ptr();
41
+ tcg_gen_neg_i32(tmp, shift);
50
+ a1 = tcg_temp_new_ptr();
42
+ tcg_gen_andi_i32(tmp, tmp, (8 << vece) - 1);
51
+ a2 = tcg_temp_new_ptr();
43
+ tcg_gen_gvec_rotls(vece, dofs, aofs, tmp, oprsz, maxsz);
52
+ a3 = tcg_temp_new_ptr();
44
+ tcg_temp_free_i32(tmp);
53
+ a4 = tcg_temp_new_ptr();
54
+
55
+ tcg_gen_addi_ptr(a0, cpu_env, dofs);
56
+ tcg_gen_addi_ptr(a1, cpu_env, aofs);
57
+ tcg_gen_addi_ptr(a2, cpu_env, bofs);
58
+ tcg_gen_addi_ptr(a3, cpu_env, cofs);
59
+ tcg_gen_addi_ptr(a4, cpu_env, eofs);
60
+
61
+ fn(a0, a1, a2, a3, a4, ptr, desc);
62
+
63
+ tcg_temp_free_ptr(a0);
64
+ tcg_temp_free_ptr(a1);
65
+ tcg_temp_free_ptr(a2);
66
+ tcg_temp_free_ptr(a3);
67
+ tcg_temp_free_ptr(a4);
68
+ tcg_temp_free_i32(desc);
69
+}
45
+}
70
+
46
+
71
/* Return true if we want to implement something of OPRSZ bytes
47
/*
72
in units of LNSZ. This limits the expansion of inline code. */
48
* Expand D = A << (B % element bits)
73
static inline bool check_size_impl(uint32_t oprsz, uint32_t lnsz)
49
*
74
--
50
--
75
2.20.1
51
2.34.1
76
77
diff view generated by jsdifflib
New patch
1
Clang 14, with --enable-tcg-interpreter errors with
1
2
3
include/qemu/int128.h:487:16: error: alignment of field 'i' (128 bits)
4
does not match the alignment of the first field in transparent union;
5
transparent_union attribute ignored [-Werror,-Wignored-attributes]
6
__int128_t i;
7
^
8
include/qemu/int128.h:486:12: note: alignment of first field is 64 bits
9
Int128 s;
10
^
11
1 error generated.
12
13
By placing the __uint128_t member first, this is avoided.
14
15
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
16
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
17
Message-Id: <20230501204625.277361-1-richard.henderson@linaro.org>
18
---
19
include/qemu/int128.h | 4 ++--
20
1 file changed, 2 insertions(+), 2 deletions(-)
21
22
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
23
index XXXXXXX..XXXXXXX 100644
24
--- a/include/qemu/int128.h
25
+++ b/include/qemu/int128.h
26
@@ -XXX,XX +XXX,XX @@ static inline void bswap128s(Int128 *s)
27
*/
28
#ifdef CONFIG_INT128
29
typedef union {
30
- Int128 s;
31
- __int128_t i;
32
__uint128_t u;
33
+ __int128_t i;
34
+ Int128 s;
35
} Int128Alias __attribute__((transparent_union));
36
#else
37
typedef Int128 Int128Alias;
38
--
39
2.34.1
40
41
diff view generated by jsdifflib
New patch
1
Use the attribute, which is supported by clang, instead of
2
the #pragma, which is not supported and, for some reason,
3
also not detected by the meson probe, so we fail by -Werror.
1
4
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Juan Quintela <quintela@redhat.com>
7
Message-Id: <20230501210555.289806-1-richard.henderson@linaro.org>
8
---
9
meson.build | 5 +----
10
migration/xbzrle.c | 9 ++++-----
11
2 files changed, 5 insertions(+), 9 deletions(-)
12
13
diff --git a/meson.build b/meson.build
14
index XXXXXXX..XXXXXXX 100644
15
--- a/meson.build
16
+++ b/meson.build
17
@@ -XXX,XX +XXX,XX @@ config_host_data.set('CONFIG_AVX512F_OPT', get_option('avx512f') \
18
config_host_data.set('CONFIG_AVX512BW_OPT', get_option('avx512bw') \
19
.require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX512BW') \
20
.require(cc.links('''
21
- #pragma GCC push_options
22
- #pragma GCC target("avx512bw")
23
#include <cpuid.h>
24
#include <immintrin.h>
25
- static int bar(void *a) {
26
-
27
+ static int __attribute__((target("avx512bw"))) bar(void *a) {
28
__m512i *x = a;
29
__m512i res= _mm512_abs_epi8(*x);
30
return res[1];
31
diff --git a/migration/xbzrle.c b/migration/xbzrle.c
32
index XXXXXXX..XXXXXXX 100644
33
--- a/migration/xbzrle.c
34
+++ b/migration/xbzrle.c
35
@@ -XXX,XX +XXX,XX @@ int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen)
36
}
37
38
#if defined(CONFIG_AVX512BW_OPT)
39
-#pragma GCC push_options
40
-#pragma GCC target("avx512bw")
41
#include <immintrin.h>
42
-int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
43
- uint8_t *dst, int dlen)
44
+
45
+int __attribute__((target("avx512bw")))
46
+xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
47
+ uint8_t *dst, int dlen)
48
{
49
uint32_t zrun_len = 0, nzrun_len = 0;
50
int d = 0, i = 0, num = 0;
51
@@ -XXX,XX +XXX,XX @@ int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
52
}
53
return d;
54
}
55
-#pragma GCC pop_options
56
#endif
57
--
58
2.34.1
diff view generated by jsdifflib
New patch
1
At least RISC-V has the need to be able to perform a read
2
using execute permissions, outside of translation.
3
Add helpers to facilitate this.
1
4
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Acked-by: Alistair Francis <alistair.francis@wdc.com>
7
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
8
Tested-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
9
Message-Id: <20230325105429.1142530-9-richard.henderson@linaro.org>
10
Message-Id: <20230412114333.118895-9-richard.henderson@linaro.org>
11
---
12
include/exec/cpu_ldst.h | 9 +++++++
13
accel/tcg/cputlb.c | 48 ++++++++++++++++++++++++++++++++++
14
accel/tcg/user-exec.c | 58 +++++++++++++++++++++++++++++++++++++++++
15
3 files changed, 115 insertions(+)
16
17
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
18
index XXXXXXX..XXXXXXX 100644
19
--- a/include/exec/cpu_ldst.h
20
+++ b/include/exec/cpu_ldst.h
21
@@ -XXX,XX +XXX,XX @@ static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
22
# define cpu_stq_mmu cpu_stq_le_mmu
23
#endif
24
25
+uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
26
+ MemOpIdx oi, uintptr_t ra);
27
+uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
28
+ MemOpIdx oi, uintptr_t ra);
29
+uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
30
+ MemOpIdx oi, uintptr_t ra);
31
+uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
32
+ MemOpIdx oi, uintptr_t ra);
33
+
34
uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr);
35
uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr);
36
uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr);
37
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/accel/tcg/cputlb.c
40
+++ b/accel/tcg/cputlb.c
41
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr)
42
MemOpIdx oi = make_memop_idx(MO_TEUQ, cpu_mmu_index(env, true));
43
return full_ldq_code(env, addr, oi, 0);
44
}
45
+
46
+uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
47
+ MemOpIdx oi, uintptr_t retaddr)
48
+{
49
+ return full_ldub_code(env, addr, oi, retaddr);
50
+}
51
+
52
+uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
53
+ MemOpIdx oi, uintptr_t retaddr)
54
+{
55
+ MemOp mop = get_memop(oi);
56
+ int idx = get_mmuidx(oi);
57
+ uint16_t ret;
58
+
59
+ ret = full_lduw_code(env, addr, make_memop_idx(MO_TEUW, idx), retaddr);
60
+ if ((mop & MO_BSWAP) != MO_TE) {
61
+ ret = bswap16(ret);
62
+ }
63
+ return ret;
64
+}
65
+
66
+uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
67
+ MemOpIdx oi, uintptr_t retaddr)
68
+{
69
+ MemOp mop = get_memop(oi);
70
+ int idx = get_mmuidx(oi);
71
+ uint32_t ret;
72
+
73
+ ret = full_ldl_code(env, addr, make_memop_idx(MO_TEUL, idx), retaddr);
74
+ if ((mop & MO_BSWAP) != MO_TE) {
75
+ ret = bswap32(ret);
76
+ }
77
+ return ret;
78
+}
79
+
80
+uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
81
+ MemOpIdx oi, uintptr_t retaddr)
82
+{
83
+ MemOp mop = get_memop(oi);
84
+ int idx = get_mmuidx(oi);
85
+ uint64_t ret;
86
+
87
+ ret = full_ldq_code(env, addr, make_memop_idx(MO_TEUQ, idx), retaddr);
88
+ if ((mop & MO_BSWAP) != MO_TE) {
89
+ ret = bswap64(ret);
90
+ }
91
+ return ret;
92
+}
93
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
94
index XXXXXXX..XXXXXXX 100644
95
--- a/accel/tcg/user-exec.c
96
+++ b/accel/tcg/user-exec.c
97
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr ptr)
98
return ret;
99
}
100
101
+uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
102
+ MemOpIdx oi, uintptr_t ra)
103
+{
104
+ void *haddr;
105
+ uint8_t ret;
106
+
107
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_INST_FETCH);
108
+ ret = ldub_p(haddr);
109
+ clear_helper_retaddr();
110
+ return ret;
111
+}
112
+
113
+uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
114
+ MemOpIdx oi, uintptr_t ra)
115
+{
116
+ void *haddr;
117
+ uint16_t ret;
118
+
119
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_INST_FETCH);
120
+ ret = lduw_p(haddr);
121
+ clear_helper_retaddr();
122
+ if (get_memop(oi) & MO_BSWAP) {
123
+ ret = bswap16(ret);
124
+ }
125
+ return ret;
126
+}
127
+
128
+uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
129
+ MemOpIdx oi, uintptr_t ra)
130
+{
131
+ void *haddr;
132
+ uint32_t ret;
133
+
134
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_INST_FETCH);
135
+ ret = ldl_p(haddr);
136
+ clear_helper_retaddr();
137
+ if (get_memop(oi) & MO_BSWAP) {
138
+ ret = bswap32(ret);
139
+ }
140
+ return ret;
141
+}
142
+
143
+uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
144
+ MemOpIdx oi, uintptr_t ra)
145
+{
146
+ void *haddr;
147
+ uint64_t ret;
148
+
149
+ validate_memop(oi, MO_BEUQ);
150
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
151
+ ret = ldq_p(haddr);
152
+ clear_helper_retaddr();
153
+ if (get_memop(oi) & MO_BSWAP) {
154
+ ret = bswap64(ret);
155
+ }
156
+ return ret;
157
+}
158
+
159
#include "ldst_common.c.inc"
160
161
/*
162
--
163
2.34.1
diff view generated by jsdifflib
1
From: Taylor Simpson <tsimpson@quicinc.com>
1
Since TCG_TYPE_I32 values are kept sign-extended in registers,
2
via ".w" instructions, we need not extend if the register matches.
3
This is already relied upon by comparisons.
2
4
3
Currently, helpers can only take up to 6 arguments. This patch adds the
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
capability for up to 7 arguments. I have tested it with the Hexagon port
5
that I am preparing for submission.
6
7
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
8
Message-Id: <1580942510-2820-1-git-send-email-tsimpson@quicinc.com>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
7
---
11
include/exec/helper-gen.h | 13 +++++++++++++
8
tcg/loongarch64/tcg-target.c.inc | 4 +++-
12
include/exec/helper-head.h | 2 ++
9
1 file changed, 3 insertions(+), 1 deletion(-)
13
include/exec/helper-proto.h | 6 ++++++
14
include/exec/helper-tcg.h | 7 +++++++
15
4 files changed, 28 insertions(+)
16
10
17
diff --git a/include/exec/helper-gen.h b/include/exec/helper-gen.h
11
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
18
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
19
--- a/include/exec/helper-gen.h
13
--- a/tcg/loongarch64/tcg-target.c.inc
20
+++ b/include/exec/helper-gen.h
14
+++ b/tcg/loongarch64/tcg-target.c.inc
21
@@ -XXX,XX +XXX,XX @@ static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \
15
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg)
22
tcg_gen_callN(HELPER(name), dh_retvar(ret), 6, args); \
16
17
static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg)
18
{
19
- tcg_out_ext32s(s, ret, arg);
20
+ if (ret != arg) {
21
+ tcg_out_ext32s(s, ret, arg);
22
+ }
23
}
23
}
24
24
25
+#define DEF_HELPER_FLAGS_7(name, flags, ret, t1, t2, t3, t4, t5, t6, t7)\
25
static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg)
26
+static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \
27
+ dh_arg_decl(t1, 1), dh_arg_decl(t2, 2), dh_arg_decl(t3, 3), \
28
+ dh_arg_decl(t4, 4), dh_arg_decl(t5, 5), dh_arg_decl(t6, 6), \
29
+ dh_arg_decl(t7, 7)) \
30
+{ \
31
+ TCGTemp *args[7] = { dh_arg(t1, 1), dh_arg(t2, 2), dh_arg(t3, 3), \
32
+ dh_arg(t4, 4), dh_arg(t5, 5), dh_arg(t6, 6), \
33
+ dh_arg(t7, 7) }; \
34
+ tcg_gen_callN(HELPER(name), dh_retvar(ret), 7, args); \
35
+}
36
+
37
#include "helper.h"
38
#include "trace/generated-helpers.h"
39
#include "trace/generated-helpers-wrappers.h"
40
@@ -XXX,XX +XXX,XX @@ static inline void glue(gen_helper_, name)(dh_retvar_decl(ret) \
41
#undef DEF_HELPER_FLAGS_4
42
#undef DEF_HELPER_FLAGS_5
43
#undef DEF_HELPER_FLAGS_6
44
+#undef DEF_HELPER_FLAGS_7
45
#undef GEN_HELPER
46
47
#endif /* HELPER_GEN_H */
48
diff --git a/include/exec/helper-head.h b/include/exec/helper-head.h
49
index XXXXXXX..XXXXXXX 100644
50
--- a/include/exec/helper-head.h
51
+++ b/include/exec/helper-head.h
52
@@ -XXX,XX +XXX,XX @@
53
DEF_HELPER_FLAGS_5(name, 0, ret, t1, t2, t3, t4, t5)
54
#define DEF_HELPER_6(name, ret, t1, t2, t3, t4, t5, t6) \
55
DEF_HELPER_FLAGS_6(name, 0, ret, t1, t2, t3, t4, t5, t6)
56
+#define DEF_HELPER_7(name, ret, t1, t2, t3, t4, t5, t6, t7) \
57
+ DEF_HELPER_FLAGS_7(name, 0, ret, t1, t2, t3, t4, t5, t6, t7)
58
59
/* MAX_OPC_PARAM_IARGS must be set to n if last entry is DEF_HELPER_FLAGS_n. */
60
61
diff --git a/include/exec/helper-proto.h b/include/exec/helper-proto.h
62
index XXXXXXX..XXXXXXX 100644
63
--- a/include/exec/helper-proto.h
64
+++ b/include/exec/helper-proto.h
65
@@ -XXX,XX +XXX,XX @@ dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
66
dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
67
dh_ctype(t4), dh_ctype(t5), dh_ctype(t6));
68
69
+#define DEF_HELPER_FLAGS_7(name, flags, ret, t1, t2, t3, t4, t5, t6, t7) \
70
+dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
71
+ dh_ctype(t4), dh_ctype(t5), dh_ctype(t6), \
72
+ dh_ctype(t7));
73
+
74
#include "helper.h"
75
#include "trace/generated-helpers.h"
76
#include "tcg-runtime.h"
77
@@ -XXX,XX +XXX,XX @@ dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
78
#undef DEF_HELPER_FLAGS_4
79
#undef DEF_HELPER_FLAGS_5
80
#undef DEF_HELPER_FLAGS_6
81
+#undef DEF_HELPER_FLAGS_7
82
83
#endif /* HELPER_PROTO_H */
84
diff --git a/include/exec/helper-tcg.h b/include/exec/helper-tcg.h
85
index XXXXXXX..XXXXXXX 100644
86
--- a/include/exec/helper-tcg.h
87
+++ b/include/exec/helper-tcg.h
88
@@ -XXX,XX +XXX,XX @@
89
| dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) \
90
| dh_sizemask(t5, 5) | dh_sizemask(t6, 6) },
91
92
+#define DEF_HELPER_FLAGS_7(NAME, FLAGS, ret, t1, t2, t3, t4, t5, t6, t7) \
93
+ { .func = HELPER(NAME), .name = str(NAME), .flags = FLAGS, \
94
+ .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \
95
+ | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) \
96
+ | dh_sizemask(t5, 5) | dh_sizemask(t6, 6) | dh_sizemask(t7, 7) },
97
+
98
#include "helper.h"
99
#include "trace/generated-helpers.h"
100
#include "tcg-runtime.h"
101
@@ -XXX,XX +XXX,XX @@
102
#undef DEF_HELPER_FLAGS_4
103
#undef DEF_HELPER_FLAGS_5
104
#undef DEF_HELPER_FLAGS_6
105
+#undef DEF_HELPER_FLAGS_7
106
107
#endif /* HELPER_TCG_H */
108
--
26
--
109
2.20.1
27
2.34.1
110
28
111
29
diff view generated by jsdifflib
1
From: Max Filippov <jcmvbkbc@gmail.com>
1
Since TCG_TYPE_I32 values are kept sign-extended in registers, we need not
2
extend if the register matches. This is already relied upon by comparisons.
2
3
3
When a breakpoint is inserted at location for which there's currently no
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
virtual to physical translation no action is taken on CPU TB cache. If a
5
TB for that virtual address already exists but is not visible ATM the
6
breakpoint won't be hit next time an instruction at that address will be
7
executed.
8
9
Flush entire CPU TB cache in breakpoint_invalidate to force
10
re-translation of all TBs for the breakpoint address.
11
12
This change fixes the following scenario:
13
- linux user application is running
14
- a breakpoint is inserted from QEMU gdbstub for a user address that is
15
not currently present in the target CPU TLB
16
- an instruction at that address is executed, but the external debugger
17
doesn't get control.
18
19
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
20
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
21
Message-Id: <20191127220602.10827-2-jcmvbkbc@gmail.com>
22
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
23
---
6
---
24
exec.c | 15 +++++++--------
7
tcg/mips/tcg-target.c.inc | 4 +++-
25
1 file changed, 7 insertions(+), 8 deletions(-)
8
1 file changed, 3 insertions(+), 1 deletion(-)
26
9
27
diff --git a/exec.c b/exec.c
10
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
28
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
29
--- a/exec.c
12
--- a/tcg/mips/tcg-target.c.inc
30
+++ b/exec.c
13
+++ b/tcg/mips/tcg-target.c.inc
31
@@ -XXX,XX +XXX,XX @@ void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs)
14
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rs)
32
15
33
static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
16
static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rs)
34
{
17
{
35
- MemTxAttrs attrs;
18
- tcg_out_ext32s(s, rd, rs);
36
- hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
19
+ if (rd != rs) {
37
- int asidx = cpu_asidx_from_attrs(cpu, attrs);
20
+ tcg_out_ext32s(s, rd, rs);
38
- if (phys != -1) {
21
+ }
39
- /* Locks grabbed by tb_invalidate_phys_addr */
40
- tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
41
- phys | (pc & ~TARGET_PAGE_MASK), attrs);
42
- }
43
+ /*
44
+ * There may not be a virtual to physical translation for the pc
45
+ * right now, but there may exist cached TB for this pc.
46
+ * Flush the whole TB cache to force re-translation of such TBs.
47
+ * This is heavyweight, but we're debugging anyway.
48
+ */
49
+ tb_flush(cpu);
50
}
22
}
51
#endif
23
52
24
static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rs)
53
--
25
--
54
2.20.1
26
2.34.1
55
27
56
28
diff view generated by jsdifflib
New patch
1
1
This is common code in most qemu_{ld,st} slow paths, moving two
2
registers when there may be overlap between sources and destinations.
3
At present, this is only used by 32-bit hosts for 64-bit data,
4
but will shortly be used for more than that.
5
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tcg/tcg.c | 69 ++++++++++++++++++++++++++++++++++++---
10
tcg/arm/tcg-target.c.inc | 44 ++++++++++---------------
11
tcg/i386/tcg-target.c.inc | 19 +++++------
12
3 files changed, 90 insertions(+), 42 deletions(-)
13
14
diff --git a/tcg/tcg.c b/tcg/tcg.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/tcg.c
17
+++ b/tcg/tcg.c
18
@@ -XXX,XX +XXX,XX @@ static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
19
static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
20
static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
21
static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
22
-static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
23
- __attribute__((unused));
24
+static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
25
static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
26
static void tcg_out_goto_tb(TCGContext *s, int which);
27
static void tcg_out_op(TCGContext *s, TCGOpcode opc,
28
@@ -XXX,XX +XXX,XX @@ void tcg_raise_tb_overflow(TCGContext *s)
29
siglongjmp(s->jmp_trans, -2);
30
}
31
32
+typedef struct TCGMovExtend {
33
+ TCGReg dst;
34
+ TCGReg src;
35
+ TCGType dst_type;
36
+ TCGType src_type;
37
+ MemOp src_ext;
38
+} TCGMovExtend;
39
+
40
/**
41
* tcg_out_movext -- move and extend
42
* @s: tcg context
43
@@ -XXX,XX +XXX,XX @@ void tcg_raise_tb_overflow(TCGContext *s)
44
*
45
* Move or extend @src into @dst, depending on @src_ext and the types.
46
*/
47
-static void __attribute__((unused))
48
-tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
49
- TCGType src_type, MemOp src_ext, TCGReg src)
50
+static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
51
+ TCGType src_type, MemOp src_ext, TCGReg src)
52
{
53
switch (src_ext) {
54
case MO_UB:
55
@@ -XXX,XX +XXX,XX @@ tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
56
}
57
}
58
59
+/* Minor variations on a theme, using a structure. */
60
+static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
61
+ TCGReg src)
62
+{
63
+ tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
64
+}
65
+
66
+static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
67
+{
68
+ tcg_out_movext1_new_src(s, i, i->src);
69
+}
70
+
71
+/**
72
+ * tcg_out_movext2 -- move and extend two pair
73
+ * @s: tcg context
74
+ * @i1: first move description
75
+ * @i2: second move description
76
+ * @scratch: temporary register, or -1 for none
77
+ *
78
+ * As tcg_out_movext, for both @i1 and @i2, caring for overlap
79
+ * between the sources and destinations.
80
+ */
81
+
82
+static void __attribute__((unused))
83
+tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
84
+ const TCGMovExtend *i2, int scratch)
85
+{
86
+ TCGReg src1 = i1->src;
87
+ TCGReg src2 = i2->src;
88
+
89
+ if (i1->dst != src2) {
90
+ tcg_out_movext1(s, i1);
91
+ tcg_out_movext1(s, i2);
92
+ return;
93
+ }
94
+ if (i2->dst == src1) {
95
+ TCGType src1_type = i1->src_type;
96
+ TCGType src2_type = i2->src_type;
97
+
98
+ if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
99
+ /* The data is now in the correct registers, now extend. */
100
+ src1 = i2->src;
101
+ src2 = i1->src;
102
+ } else {
103
+ tcg_debug_assert(scratch >= 0);
104
+ tcg_out_mov(s, src1_type, scratch, src1);
105
+ src1 = scratch;
106
+ }
107
+ }
108
+ tcg_out_movext1_new_src(s, i2, src2);
109
+ tcg_out_movext1_new_src(s, i1, src1);
110
+}
111
+
112
#define C_PFX1(P, A) P##A
113
#define C_PFX2(P, A, B) P##A##_##B
114
#define C_PFX3(P, A, B, C) P##A##_##B##_##C
115
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
116
index XXXXXXX..XXXXXXX 100644
117
--- a/tcg/arm/tcg-target.c.inc
118
+++ b/tcg/arm/tcg-target.c.inc
119
@@ -XXX,XX +XXX,XX @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
120
121
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
122
{
123
- TCGReg argreg, datalo, datahi;
124
+ TCGReg argreg;
125
MemOpIdx oi = lb->oi;
126
MemOp opc = get_memop(oi);
127
128
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
129
/* Use the canonical unsigned helpers and minimize icache usage. */
130
tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
131
132
- datalo = lb->datalo_reg;
133
- datahi = lb->datahi_reg;
134
if ((opc & MO_SIZE) == MO_64) {
135
- if (datalo != TCG_REG_R1) {
136
- tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
137
- tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
138
- } else if (datahi != TCG_REG_R0) {
139
- tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
140
- tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
141
- } else {
142
- tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0);
143
- tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
144
- tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_TMP);
145
- }
146
+ TCGMovExtend ext[2] = {
147
+ { .dst = lb->datalo_reg, .dst_type = TCG_TYPE_I32,
148
+ .src = TCG_REG_R0, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
149
+ { .dst = lb->datahi_reg, .dst_type = TCG_TYPE_I32,
150
+ .src = TCG_REG_R1, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
151
+ };
152
+ tcg_out_movext2(s, &ext[0], &ext[1], TCG_REG_TMP);
153
} else {
154
- tcg_out_movext(s, TCG_TYPE_I32, datalo,
155
+ tcg_out_movext(s, TCG_TYPE_I32, lb->datalo_reg,
156
TCG_TYPE_I32, opc & MO_SSIZE, TCG_REG_R0);
157
}
158
159
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
160
161
if (TARGET_LONG_BITS == 64) {
162
/* 64-bit target address is aligned into R2:R3. */
163
- if (l->addrhi_reg != TCG_REG_R2) {
164
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, l->addrlo_reg);
165
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, l->addrhi_reg);
166
- } else if (l->addrlo_reg != TCG_REG_R3) {
167
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, l->addrhi_reg);
168
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, l->addrlo_reg);
169
- } else {
170
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, TCG_REG_R2);
171
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, TCG_REG_R3);
172
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, TCG_REG_R1);
173
- }
174
+ TCGMovExtend ext[2] = {
175
+ { .dst = TCG_REG_R2, .dst_type = TCG_TYPE_I32,
176
+ .src = l->addrlo_reg,
177
+ .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
178
+ { .dst = TCG_REG_R3, .dst_type = TCG_TYPE_I32,
179
+ .src = l->addrhi_reg,
180
+ .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
181
+ };
182
+ tcg_out_movext2(s, &ext[0], &ext[1], TCG_REG_TMP);
183
} else {
184
tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, l->addrlo_reg);
185
}
186
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
187
index XXXXXXX..XXXXXXX 100644
188
--- a/tcg/i386/tcg-target.c.inc
189
+++ b/tcg/i386/tcg-target.c.inc
190
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
191
{
192
MemOpIdx oi = l->oi;
193
MemOp opc = get_memop(oi);
194
- TCGReg data_reg;
195
tcg_insn_unit **label_ptr = &l->label_ptr[0];
196
197
/* resolve label address */
198
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
199
200
tcg_out_branch(s, 1, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
201
202
- data_reg = l->datalo_reg;
203
if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
204
- if (data_reg == TCG_REG_EDX) {
205
- /* xchg %edx, %eax */
206
- tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
207
- tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
208
- } else {
209
- tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
210
- tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
211
- }
212
+ TCGMovExtend ext[2] = {
213
+ { .dst = l->datalo_reg, .dst_type = TCG_TYPE_I32,
214
+ .src = TCG_REG_EAX, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
215
+ { .dst = l->datahi_reg, .dst_type = TCG_TYPE_I32,
216
+ .src = TCG_REG_EDX, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
217
+ };
218
+ tcg_out_movext2(s, &ext[0], &ext[1], -1);
219
} else {
220
- tcg_out_movext(s, l->type, data_reg,
221
+ tcg_out_movext(s, l->type, l->datalo_reg,
222
TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_EAX);
223
}
224
225
--
226
2.34.1
227
228
diff view generated by jsdifflib