1
The following changes since commit 2ecfc0657afa5d29a373271b342f704a1a3c6737:
1
Second try's the charm today, right?
2
2
3
Merge remote-tracking branch 'remotes/armbru/tags/pull-misc-2020-12-10' into staging (2020-12-10 17:01:05 +0000)
3
4
r~
5
6
7
The following changes since commit 00b1faea41d283e931256aa78aa975a369ec3ae6:
8
9
Merge tag 'pull-target-arm-20230123' of https://git.linaro.org/people/pmaydell/qemu-arm into staging (2023-01-23 13:40:28 +0000)
4
10
5
are available in the Git repository at:
11
are available in the Git repository at:
6
12
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20201210
13
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230123
8
14
9
for you to fetch changes up to 9e2658d62ebc23efe7df43fc0e306f129510d874:
15
for you to fetch changes up to 709bcd7da3f6b4655d910634a0d520fa1439df38:
10
16
11
accel/tcg: rename tcg-cpus functions to match module name (2020-12-10 17:44:10 -0600)
17
tcg/loongarch64: Reorg goto_tb implementation (2023-01-23 16:00:13 -1000)
12
18
13
----------------------------------------------------------------
19
----------------------------------------------------------------
14
Split CpusAccel for tcg variants
20
common-user: Re-enable ppc32 host
21
tcg: Avoid recursion in tcg_gen_mulu2_i32
22
tcg: Mark tcg helpers noinline to avoid an issue with LTO
23
tcg/arm: Use register pair allocation for qemu_{ld,st}_i64
24
disas: Enable loongarch disassembler, and fixes
25
tcg/loongarch64: Improve move immediate
26
tcg/loongarch64: Improve add immediate
27
tcg/loongarch64: Improve setcond
28
tcg/loongarch64: Implement movcond
29
tcg/loongarch64: Use tcg_pcrel_diff in tcg_out_ldst
30
tcg/loongarch64: Reorg goto_tb implementation
15
31
16
----------------------------------------------------------------
32
----------------------------------------------------------------
17
Claudio Fontana (3):
33
Richard Henderson (14):
18
accel/tcg: split CpusAccel into three TCG variants
34
tcg: Avoid recursion in tcg_gen_mulu2_i32
19
accel/tcg: split tcg_start_vcpu_thread
35
tcg/arm: Use register pair allocation for qemu_{ld,st}_i64
20
accel/tcg: rename tcg-cpus functions to match module name
36
common-user/host/ppc: Implement safe-syscall.inc.S
37
linux-user: Implment host/ppc/host-signal.h
38
tcg: Mark tcg helpers noinline to avoid an issue with LTO
39
target/loongarch: Enable the disassembler for host tcg
40
target/loongarch: Disassemble jirl properly
41
target/loongarch: Disassemble pcadd* addresses
42
tcg/loongarch64: Update tcg-insn-defs.c.inc
43
tcg/loongarch64: Introduce tcg_out_addi
44
tcg/loongarch64: Improve setcond expansion
45
tcg/loongarch64: Implement movcond
46
tcg/loongarch64: Use tcg_pcrel_diff in tcg_out_ldst
47
tcg/loongarch64: Reorg goto_tb implementation
21
48
22
accel/tcg/tcg-cpus-icount.h | 17 ++
49
Rui Wang (1):
23
accel/tcg/tcg-cpus-rr.h | 21 ++
50
tcg/loongarch64: Optimize immediate loading
24
accel/tcg/tcg-cpus.h | 12 +-
25
accel/tcg/tcg-all.c | 13 +-
26
accel/tcg/tcg-cpus-icount.c | 147 +++++++++++++
27
accel/tcg/tcg-cpus-mttcg.c | 140 ++++++++++++
28
accel/tcg/tcg-cpus-rr.c | 305 ++++++++++++++++++++++++++
29
accel/tcg/tcg-cpus.c | 506 +-------------------------------------------
30
softmmu/icount.c | 2 +-
31
accel/tcg/meson.build | 9 +-
32
10 files changed, 670 insertions(+), 502 deletions(-)
33
create mode 100644 accel/tcg/tcg-cpus-icount.h
34
create mode 100644 accel/tcg/tcg-cpus-rr.h
35
create mode 100644 accel/tcg/tcg-cpus-icount.c
36
create mode 100644 accel/tcg/tcg-cpus-mttcg.c
37
create mode 100644 accel/tcg/tcg-cpus-rr.c
38
51
52
include/exec/helper-proto.h | 32 ++-
53
include/tcg/tcg.h | 7 -
54
linux-user/include/host/ppc/host-signal.h | 39 +++
55
tcg/arm/tcg-target-con-set.h | 7 +-
56
tcg/arm/tcg-target-con-str.h | 2 +
57
tcg/loongarch64/tcg-target-con-set.h | 5 +-
58
tcg/loongarch64/tcg-target-con-str.h | 2 +-
59
tcg/loongarch64/tcg-target.h | 11 +-
60
target/loongarch/insns.decode | 3 +-
61
disas.c | 2 +
62
target/loongarch/disas.c | 39 ++-
63
tcg/tcg-op.c | 4 +-
64
target/loongarch/insn_trans/trans_branch.c.inc | 2 +-
65
tcg/arm/tcg-target.c.inc | 28 +-
66
tcg/loongarch64/tcg-insn-defs.c.inc | 10 +-
67
tcg/loongarch64/tcg-target.c.inc | 364 ++++++++++++++++---------
68
common-user/host/ppc/safe-syscall.inc.S | 107 ++++++++
69
target/loongarch/meson.build | 3 +-
70
18 files changed, 497 insertions(+), 170 deletions(-)
71
create mode 100644 linux-user/include/host/ppc/host-signal.h
72
create mode 100644 common-user/host/ppc/safe-syscall.inc.S
diff view generated by jsdifflib
New patch
1
We have a test for one of TCG_TARGET_HAS_mulu2_i32 or
2
TCG_TARGET_HAS_muluh_i32 being defined, but the test
3
became non-functional when we changed to always define
4
all of these macros.
1
5
6
Replace this with a build-time test in tcg_gen_mulu2_i32.
7
8
Fixes: 25c4d9cc845 ("tcg: Always define all of the TCGOpcode enum members.")
9
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1435
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
12
include/tcg/tcg.h | 7 -------
13
tcg/tcg-op.c | 4 +++-
14
2 files changed, 3 insertions(+), 8 deletions(-)
15
16
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
17
index XXXXXXX..XXXXXXX 100644
18
--- a/include/tcg/tcg.h
19
+++ b/include/tcg/tcg.h
20
@@ -XXX,XX +XXX,XX @@ typedef uint64_t TCGRegSet;
21
#define TCG_TARGET_HAS_rem_i64 0
22
#endif
23
24
-/* For 32-bit targets, some sort of unsigned widening multiply is required. */
25
-#if TCG_TARGET_REG_BITS == 32 \
26
- && !(defined(TCG_TARGET_HAS_mulu2_i32) \
27
- || defined(TCG_TARGET_HAS_muluh_i32))
28
-# error "Missing unsigned widening multiply"
29
-#endif
30
-
31
#if !defined(TCG_TARGET_HAS_v64) \
32
&& !defined(TCG_TARGET_HAS_v128) \
33
&& !defined(TCG_TARGET_HAS_v256)
34
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
35
index XXXXXXX..XXXXXXX 100644
36
--- a/tcg/tcg-op.c
37
+++ b/tcg/tcg-op.c
38
@@ -XXX,XX +XXX,XX @@ void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
39
tcg_gen_op3_i32(INDEX_op_muluh_i32, rh, arg1, arg2);
40
tcg_gen_mov_i32(rl, t);
41
tcg_temp_free_i32(t);
42
- } else {
43
+ } else if (TCG_TARGET_REG_BITS == 64) {
44
TCGv_i64 t0 = tcg_temp_new_i64();
45
TCGv_i64 t1 = tcg_temp_new_i64();
46
tcg_gen_extu_i32_i64(t0, arg1);
47
@@ -XXX,XX +XXX,XX @@ void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
48
tcg_gen_extr_i64_i32(rl, rh, t0);
49
tcg_temp_free_i64(t0);
50
tcg_temp_free_i64(t1);
51
+ } else {
52
+ qemu_build_not_reached();
53
}
54
}
55
56
--
57
2.34.1
diff view generated by jsdifflib
New patch
1
Although we still can't use ldrd and strd for all operations,
2
increase the chances by getting the register allocation correct.
1
3
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/arm/tcg-target-con-set.h | 7 ++++---
7
tcg/arm/tcg-target-con-str.h | 2 ++
8
tcg/arm/tcg-target.c.inc | 28 ++++++++++++++++++----------
9
3 files changed, 24 insertions(+), 13 deletions(-)
10
11
diff --git a/tcg/arm/tcg-target-con-set.h b/tcg/arm/tcg-target-con-set.h
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/arm/tcg-target-con-set.h
14
+++ b/tcg/arm/tcg-target-con-set.h
15
@@ -XXX,XX +XXX,XX @@ C_O0_I2(r, rIN)
16
C_O0_I2(s, s)
17
C_O0_I2(w, r)
18
C_O0_I3(s, s, s)
19
+C_O0_I3(S, p, s)
20
C_O0_I4(r, r, rI, rI)
21
-C_O0_I4(s, s, s, s)
22
+C_O0_I4(S, p, s, s)
23
C_O1_I1(r, l)
24
C_O1_I1(r, r)
25
C_O1_I1(w, r)
26
@@ -XXX,XX +XXX,XX @@ C_O1_I2(w, w, wZ)
27
C_O1_I3(w, w, w, w)
28
C_O1_I4(r, r, r, rI, rI)
29
C_O1_I4(r, r, rIN, rIK, 0)
30
-C_O2_I1(r, r, l)
31
-C_O2_I2(r, r, l, l)
32
+C_O2_I1(e, p, l)
33
+C_O2_I2(e, p, l, l)
34
C_O2_I2(r, r, r, r)
35
C_O2_I4(r, r, r, r, rIN, rIK)
36
C_O2_I4(r, r, rI, rI, rIN, rIK)
37
diff --git a/tcg/arm/tcg-target-con-str.h b/tcg/arm/tcg-target-con-str.h
38
index XXXXXXX..XXXXXXX 100644
39
--- a/tcg/arm/tcg-target-con-str.h
40
+++ b/tcg/arm/tcg-target-con-str.h
41
@@ -XXX,XX +XXX,XX @@
42
* Define constraint letters for register sets:
43
* REGS(letter, register_mask)
44
*/
45
+REGS('e', ALL_GENERAL_REGS & 0x5555) /* even regs */
46
REGS('r', ALL_GENERAL_REGS)
47
REGS('l', ALL_QLOAD_REGS)
48
REGS('s', ALL_QSTORE_REGS)
49
+REGS('S', ALL_QSTORE_REGS & 0x5555) /* even qstore */
50
REGS('w', ALL_VECTOR_REGS)
51
52
/*
53
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
54
index XXXXXXX..XXXXXXX 100644
55
--- a/tcg/arm/tcg-target.c.inc
56
+++ b/tcg/arm/tcg-target.c.inc
57
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc,
58
tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend);
59
break;
60
case MO_UQ:
61
+ /* We used pair allocation for datalo, so already should be aligned. */
62
+ tcg_debug_assert((datalo & 1) == 0);
63
+ tcg_debug_assert(datahi == datalo + 1);
64
/* LDRD requires alignment; double-check that. */
65
- if (get_alignment_bits(opc) >= MO_64
66
- && (datalo & 1) == 0 && datahi == datalo + 1) {
67
+ if (get_alignment_bits(opc) >= MO_64) {
68
/*
69
* Rm (the second address op) must not overlap Rt or Rt + 1.
70
* Since datalo is aligned, we can simplify the test via alignment.
71
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg datalo,
72
tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
73
break;
74
case MO_UQ:
75
+ /* We used pair allocation for datalo, so already should be aligned. */
76
+ tcg_debug_assert((datalo & 1) == 0);
77
+ tcg_debug_assert(datahi == datalo + 1);
78
/* LDRD requires alignment; double-check that. */
79
- if (get_alignment_bits(opc) >= MO_64
80
- && (datalo & 1) == 0 && datahi == datalo + 1) {
81
+ if (get_alignment_bits(opc) >= MO_64) {
82
tcg_out_ldrd_8(s, COND_AL, datalo, addrlo, 0);
83
} else if (datalo == addrlo) {
84
tcg_out_ld32_12(s, COND_AL, datahi, addrlo, 4);
85
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_index(TCGContext *s, ARMCond cond, MemOp opc,
86
tcg_out_st32_r(s, cond, datalo, addrlo, addend);
87
break;
88
case MO_64:
89
+ /* We used pair allocation for datalo, so already should be aligned. */
90
+ tcg_debug_assert((datalo & 1) == 0);
91
+ tcg_debug_assert(datahi == datalo + 1);
92
/* STRD requires alignment; double-check that. */
93
- if (get_alignment_bits(opc) >= MO_64
94
- && (datalo & 1) == 0 && datahi == datalo + 1) {
95
+ if (get_alignment_bits(opc) >= MO_64) {
96
tcg_out_strd_r(s, cond, datalo, addrlo, addend);
97
} else if (scratch_addend) {
98
tcg_out_st32_rwb(s, cond, datalo, addend, addrlo);
99
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo,
100
tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
101
break;
102
case MO_64:
103
+ /* We used pair allocation for datalo, so already should be aligned. */
104
+ tcg_debug_assert((datalo & 1) == 0);
105
+ tcg_debug_assert(datahi == datalo + 1);
106
/* STRD requires alignment; double-check that. */
107
- if (get_alignment_bits(opc) >= MO_64
108
- && (datalo & 1) == 0 && datahi == datalo + 1) {
109
+ if (get_alignment_bits(opc) >= MO_64) {
110
tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0);
111
} else {
112
tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
113
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
114
case INDEX_op_qemu_ld_i32:
115
return TARGET_LONG_BITS == 32 ? C_O1_I1(r, l) : C_O1_I2(r, l, l);
116
case INDEX_op_qemu_ld_i64:
117
- return TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, l) : C_O2_I2(r, r, l, l);
118
+ return TARGET_LONG_BITS == 32 ? C_O2_I1(e, p, l) : C_O2_I2(e, p, l, l);
119
case INDEX_op_qemu_st_i32:
120
return TARGET_LONG_BITS == 32 ? C_O0_I2(s, s) : C_O0_I3(s, s, s);
121
case INDEX_op_qemu_st_i64:
122
- return TARGET_LONG_BITS == 32 ? C_O0_I3(s, s, s) : C_O0_I4(s, s, s, s);
123
+ return TARGET_LONG_BITS == 32 ? C_O0_I3(S, p, s) : C_O0_I4(S, p, s, s);
124
125
case INDEX_op_st_vec:
126
return C_O0_I2(w, r);
127
--
128
2.34.1
diff view generated by jsdifflib
New patch
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
3
Message-Id: <20220729172141.1789105-2-richard.henderson@linaro.org>
4
---
5
common-user/host/ppc/safe-syscall.inc.S | 107 ++++++++++++++++++++++++
6
1 file changed, 107 insertions(+)
7
create mode 100644 common-user/host/ppc/safe-syscall.inc.S
1
8
9
diff --git a/common-user/host/ppc/safe-syscall.inc.S b/common-user/host/ppc/safe-syscall.inc.S
10
new file mode 100644
11
index XXXXXXX..XXXXXXX
12
--- /dev/null
13
+++ b/common-user/host/ppc/safe-syscall.inc.S
14
@@ -XXX,XX +XXX,XX @@
15
+/*
16
+ * safe-syscall.inc.S : host-specific assembly fragment
17
+ * to handle signals occurring at the same time as system calls.
18
+ * This is intended to be included by common-user/safe-syscall.S
19
+ *
20
+ * Copyright (C) 2022 Linaro, Ltd.
21
+ *
22
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
23
+ * See the COPYING file in the top-level directory.
24
+ */
25
+
26
+/*
27
+ * Standardize on the _CALL_FOO symbols used by GCC:
28
+ * Apple XCode does not define _CALL_DARWIN.
29
+ * Clang defines _CALL_ELF (64-bit) but not _CALL_SYSV (32-bit).
30
+ */
31
+#if !defined(_CALL_SYSV) && \
32
+ !defined(_CALL_DARWIN) && \
33
+ !defined(_CALL_AIX) && \
34
+ !defined(_CALL_ELF)
35
+# if defined(__APPLE__)
36
+# define _CALL_DARWIN
37
+# elif defined(__ELF__) && TCG_TARGET_REG_BITS == 32
38
+# define _CALL_SYSV
39
+# else
40
+# error "Unknown ABI"
41
+# endif
42
+#endif
43
+
44
+#ifndef _CALL_SYSV
45
+# error "Unsupported ABI"
46
+#endif
47
+
48
+
49
+ .global safe_syscall_base
50
+ .global safe_syscall_start
51
+ .global safe_syscall_end
52
+ .type safe_syscall_base, @function
53
+
54
+ .text
55
+
56
+ /*
57
+ * This is the entry point for making a system call. The calling
58
+ * convention here is that of a C varargs function with the
59
+ * first argument an 'int *' to the signal_pending flag, the
60
+ * second one the system call number (as a 'long'), and all further
61
+ * arguments being syscall arguments (also 'long').
62
+ */
63
+safe_syscall_base:
64
+ .cfi_startproc
65
+ stwu 1, -8(1)
66
+ .cfi_def_cfa_offset 8
67
+ stw 30, 4(1)
68
+ .cfi_offset 30, -4
69
+
70
+ /*
71
+ * We enter with r3 == &signal_pending
72
+ * r4 == syscall number
73
+ * r5 ... r10 == syscall arguments
74
+ * and return the result in r3
75
+ * and the syscall instruction needs
76
+ * r0 == syscall number
77
+ * r3 ... r8 == syscall arguments
78
+ * and returns the result in r3
79
+ * Shuffle everything around appropriately.
80
+ */
81
+ mr 30, 3 /* signal_pending */
82
+ mr 0, 4 /* syscall number */
83
+ mr 3, 5 /* syscall arguments */
84
+ mr 4, 6
85
+ mr 5, 7
86
+ mr 6, 8
87
+ mr 7, 9
88
+ mr 8, 10
89
+
90
+ /*
91
+ * This next sequence of code works in conjunction with the
92
+ * rewind_if_safe_syscall_function(). If a signal is taken
93
+ * and the interrupted PC is anywhere between 'safe_syscall_start'
94
+ * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'.
95
+ * The code sequence must therefore be able to cope with this, and
96
+ * the syscall instruction must be the final one in the sequence.
97
+ */
98
+safe_syscall_start:
99
+ /* if signal_pending is non-zero, don't do the call */
100
+ lwz 12, 0(30)
101
+ cmpwi 0, 12, 0
102
+ bne- 2f
103
+ sc
104
+safe_syscall_end:
105
+ /* code path when we did execute the syscall */
106
+ lwz 30, 4(1) /* restore r30 */
107
+ addi 1, 1, 8 /* restore stack */
108
+ .cfi_restore 30
109
+ .cfi_def_cfa_offset 0
110
+ bnslr+ /* return on success */
111
+ b safe_syscall_set_errno_tail
112
+
113
+ /* code path when we didn't execute the syscall */
114
+2: lwz 30, 4(1)
115
+ addi 1, 1, 8
116
+ addi 3, 0, QEMU_ERESTARTSYS
117
+ b safe_syscall_set_errno_tail
118
+
119
+ .cfi_endproc
120
+
121
+ .size safe_syscall_base, .-safe_syscall_base
122
--
123
2.34.1
diff view generated by jsdifflib
1
From: Claudio Fontana <cfontana@suse.de>
1
This commit re-enables ppc32 as a linux-user host,
2
as existance of the directory is noted by configure.
2
3
3
split up the CpusAccel tcg_cpus into three TCG variants:
4
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1097
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
7
Message-Id: <20220729172141.1789105-3-richard.henderson@linaro.org>
8
---
9
linux-user/include/host/ppc/host-signal.h | 39 +++++++++++++++++++++++
10
1 file changed, 39 insertions(+)
11
create mode 100644 linux-user/include/host/ppc/host-signal.h
4
12
5
tcg_cpus_rr (single threaded, round robin cpus)
13
diff --git a/linux-user/include/host/ppc/host-signal.h b/linux-user/include/host/ppc/host-signal.h
6
tcg_cpus_icount (same as rr, but with instruction counting enabled)
7
tcg_cpus_mttcg (multi-threaded cpus)
8
9
Suggested-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Claudio Fontana <cfontana@suse.de>
11
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
12
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
13
Message-Id: <20201015143217.29337-2-cfontana@suse.de>
14
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
15
---
16
accel/tcg/tcg-cpus-icount.h | 17 ++
17
accel/tcg/tcg-cpus-mttcg.h | 21 ++
18
accel/tcg/tcg-cpus-rr.h | 20 ++
19
accel/tcg/tcg-cpus.h | 13 +-
20
accel/tcg/tcg-all.c | 8 +-
21
accel/tcg/tcg-cpus-icount.c | 147 +++++++++++
22
accel/tcg/tcg-cpus-mttcg.c | 117 +++++++++
23
accel/tcg/tcg-cpus-rr.c | 270 ++++++++++++++++++++
24
accel/tcg/tcg-cpus.c | 484 ++----------------------------------
25
softmmu/icount.c | 2 +-
26
accel/tcg/meson.build | 9 +-
27
11 files changed, 646 insertions(+), 462 deletions(-)
28
create mode 100644 accel/tcg/tcg-cpus-icount.h
29
create mode 100644 accel/tcg/tcg-cpus-mttcg.h
30
create mode 100644 accel/tcg/tcg-cpus-rr.h
31
create mode 100644 accel/tcg/tcg-cpus-icount.c
32
create mode 100644 accel/tcg/tcg-cpus-mttcg.c
33
create mode 100644 accel/tcg/tcg-cpus-rr.c
34
35
diff --git a/accel/tcg/tcg-cpus-icount.h b/accel/tcg/tcg-cpus-icount.h
36
new file mode 100644
14
new file mode 100644
37
index XXXXXXX..XXXXXXX
15
index XXXXXXX..XXXXXXX
38
--- /dev/null
16
--- /dev/null
39
+++ b/accel/tcg/tcg-cpus-icount.h
17
+++ b/linux-user/include/host/ppc/host-signal.h
40
@@ -XXX,XX +XXX,XX @@
18
@@ -XXX,XX +XXX,XX @@
41
+/*
19
+/*
42
+ * QEMU TCG Single Threaded vCPUs implementation using instruction counting
20
+ * host-signal.h: signal info dependent on the host architecture
43
+ *
21
+ *
44
+ * Copyright 2020 SUSE LLC
22
+ * Copyright (c) 2022 Linaro Ltd.
45
+ *
23
+ *
46
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
24
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
47
+ * See the COPYING file in the top-level directory.
25
+ * See the COPYING file in the top-level directory.
48
+ */
26
+ */
49
+
27
+
50
+#ifndef TCG_CPUS_ICOUNT_H
28
+#ifndef PPC_HOST_SIGNAL_H
51
+#define TCG_CPUS_ICOUNT_H
29
+#define PPC_HOST_SIGNAL_H
52
+
30
+
53
+void handle_icount_deadline(void);
31
+#include <asm/ptrace.h>
54
+void prepare_icount_for_run(CPUState *cpu);
55
+void process_icount_data(CPUState *cpu);
56
+
32
+
57
+#endif /* TCG_CPUS_ICOUNT_H */
33
+/* The third argument to a SA_SIGINFO handler is ucontext_t. */
58
diff --git a/accel/tcg/tcg-cpus-mttcg.h b/accel/tcg/tcg-cpus-mttcg.h
34
+typedef ucontext_t host_sigcontext;
59
new file mode 100644
60
index XXXXXXX..XXXXXXX
61
--- /dev/null
62
+++ b/accel/tcg/tcg-cpus-mttcg.h
63
@@ -XXX,XX +XXX,XX @@
64
+/*
65
+ * QEMU TCG Multi Threaded vCPUs implementation
66
+ *
67
+ * Copyright 2020 SUSE LLC
68
+ *
69
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
70
+ * See the COPYING file in the top-level directory.
71
+ */
72
+
35
+
73
+#ifndef TCG_CPUS_MTTCG_H
36
+static inline uintptr_t host_signal_pc(host_sigcontext *uc)
74
+#define TCG_CPUS_MTTCG_H
75
+
76
+/*
77
+ * In the multi-threaded case each vCPU has its own thread. The TLS
78
+ * variable current_cpu can be used deep in the code to find the
79
+ * current CPUState for a given thread.
80
+ */
81
+
82
+void *tcg_cpu_thread_fn(void *arg);
83
+
84
+#endif /* TCG_CPUS_MTTCG_H */
85
diff --git a/accel/tcg/tcg-cpus-rr.h b/accel/tcg/tcg-cpus-rr.h
86
new file mode 100644
87
index XXXXXXX..XXXXXXX
88
--- /dev/null
89
+++ b/accel/tcg/tcg-cpus-rr.h
90
@@ -XXX,XX +XXX,XX @@
91
+/*
92
+ * QEMU TCG Single Threaded vCPUs implementation
93
+ *
94
+ * Copyright 2020 SUSE LLC
95
+ *
96
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
97
+ * See the COPYING file in the top-level directory.
98
+ */
99
+
100
+#ifndef TCG_CPUS_RR_H
101
+#define TCG_CPUS_RR_H
102
+
103
+#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
104
+
105
+/* Kick all RR vCPUs. */
106
+void qemu_cpu_kick_rr_cpus(CPUState *unused);
107
+
108
+void *tcg_rr_cpu_thread_fn(void *arg);
109
+
110
+#endif /* TCG_CPUS_RR_H */
111
diff --git a/accel/tcg/tcg-cpus.h b/accel/tcg/tcg-cpus.h
112
index XXXXXXX..XXXXXXX 100644
113
--- a/accel/tcg/tcg-cpus.h
114
+++ b/accel/tcg/tcg-cpus.h
115
@@ -XXX,XX +XXX,XX @@
116
/*
117
- * Accelerator CPUS Interface
118
+ * QEMU TCG vCPU common functionality
119
+ *
120
+ * Functionality common to all TCG vcpu variants: mttcg, rr and icount.
121
*
122
* Copyright 2020 SUSE LLC
123
*
124
@@ -XXX,XX +XXX,XX @@
125
126
#include "sysemu/cpus.h"
127
128
-extern const CpusAccel tcg_cpus;
129
+extern const CpusAccel tcg_cpus_mttcg;
130
+extern const CpusAccel tcg_cpus_icount;
131
+extern const CpusAccel tcg_cpus_rr;
132
+
133
+void tcg_start_vcpu_thread(CPUState *cpu);
134
+void qemu_tcg_destroy_vcpu(CPUState *cpu);
135
+int tcg_cpu_exec(CPUState *cpu);
136
+void tcg_handle_interrupt(CPUState *cpu, int mask);
137
138
#endif /* TCG_CPUS_H */
139
diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c
140
index XXXXXXX..XXXXXXX 100644
141
--- a/accel/tcg/tcg-all.c
142
+++ b/accel/tcg/tcg-all.c
143
@@ -XXX,XX +XXX,XX @@ static int tcg_init(MachineState *ms)
144
145
tcg_exec_init(s->tb_size * 1024 * 1024);
146
mttcg_enabled = s->mttcg_enabled;
147
- cpus_register_accel(&tcg_cpus);
148
149
+ if (mttcg_enabled) {
150
+ cpus_register_accel(&tcg_cpus_mttcg);
151
+ } else if (icount_enabled()) {
152
+ cpus_register_accel(&tcg_cpus_icount);
153
+ } else {
154
+ cpus_register_accel(&tcg_cpus_rr);
155
+ }
156
return 0;
157
}
158
159
diff --git a/accel/tcg/tcg-cpus-icount.c b/accel/tcg/tcg-cpus-icount.c
160
new file mode 100644
161
index XXXXXXX..XXXXXXX
162
--- /dev/null
163
+++ b/accel/tcg/tcg-cpus-icount.c
164
@@ -XXX,XX +XXX,XX @@
165
+/*
166
+ * QEMU TCG Single Threaded vCPUs implementation using instruction counting
167
+ *
168
+ * Copyright (c) 2003-2008 Fabrice Bellard
169
+ * Copyright (c) 2014 Red Hat Inc.
170
+ *
171
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
172
+ * of this software and associated documentation files (the "Software"), to deal
173
+ * in the Software without restriction, including without limitation the rights
174
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
175
+ * copies of the Software, and to permit persons to whom the Software is
176
+ * furnished to do so, subject to the following conditions:
177
+ *
178
+ * The above copyright notice and this permission notice shall be included in
179
+ * all copies or substantial portions of the Software.
180
+ *
181
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
182
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
183
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
184
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
185
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
186
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
187
+ * THE SOFTWARE.
188
+ */
189
+
190
+#include "qemu/osdep.h"
191
+#include "qemu-common.h"
192
+#include "sysemu/tcg.h"
193
+#include "sysemu/replay.h"
194
+#include "qemu/main-loop.h"
195
+#include "qemu/guest-random.h"
196
+#include "exec/exec-all.h"
197
+#include "hw/boards.h"
198
+
199
+#include "tcg-cpus.h"
200
+#include "tcg-cpus-icount.h"
201
+#include "tcg-cpus-rr.h"
202
+
203
+static int64_t tcg_get_icount_limit(void)
204
+{
37
+{
205
+ int64_t deadline;
38
+ return uc->uc_mcontext.regs->nip;
206
+
207
+ if (replay_mode != REPLAY_MODE_PLAY) {
208
+ /*
209
+ * Include all the timers, because they may need an attention.
210
+ * Too long CPU execution may create unnecessary delay in UI.
211
+ */
212
+ deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
213
+ QEMU_TIMER_ATTR_ALL);
214
+ /* Check realtime timers, because they help with input processing */
215
+ deadline = qemu_soonest_timeout(deadline,
216
+ qemu_clock_deadline_ns_all(QEMU_CLOCK_REALTIME,
217
+ QEMU_TIMER_ATTR_ALL));
218
+
219
+ /*
220
+ * Maintain prior (possibly buggy) behaviour where if no deadline
221
+ * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
222
+ * INT32_MAX nanoseconds ahead, we still use INT32_MAX
223
+ * nanoseconds.
224
+ */
225
+ if ((deadline < 0) || (deadline > INT32_MAX)) {
226
+ deadline = INT32_MAX;
227
+ }
228
+
229
+ return icount_round(deadline);
230
+ } else {
231
+ return replay_get_instructions();
232
+ }
233
+}
39
+}
234
+
40
+
235
+static void notify_aio_contexts(void)
41
+static inline void host_signal_set_pc(host_sigcontext *uc, uintptr_t pc)
236
+{
42
+{
237
+ /* Wake up other AioContexts. */
43
+ uc->uc_mcontext.regs->nip = pc;
238
+ qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
239
+ qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
240
+}
44
+}
241
+
45
+
242
+void handle_icount_deadline(void)
46
+static inline void *host_signal_mask(host_sigcontext *uc)
243
+{
47
+{
244
+ assert(qemu_in_vcpu_thread());
48
+ return &uc->uc_sigmask;
245
+ int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
246
+ QEMU_TIMER_ATTR_ALL);
247
+
248
+ if (deadline == 0) {
249
+ notify_aio_contexts();
250
+ }
251
+}
49
+}
252
+
50
+
253
+void prepare_icount_for_run(CPUState *cpu)
51
+static inline bool host_signal_write(siginfo_t *info, host_sigcontext *uc)
254
+{
52
+{
255
+ int insns_left;
53
+ return uc->uc_mcontext.regs->trap != 0x400
256
+
54
+ && (uc->uc_mcontext.regs->dsisr & 0x02000000);
257
+ /*
258
+ * These should always be cleared by process_icount_data after
259
+ * each vCPU execution. However u16.high can be raised
260
+ * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
261
+ */
262
+ g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
263
+ g_assert(cpu->icount_extra == 0);
264
+
265
+ cpu->icount_budget = tcg_get_icount_limit();
266
+ insns_left = MIN(0xffff, cpu->icount_budget);
267
+ cpu_neg(cpu)->icount_decr.u16.low = insns_left;
268
+ cpu->icount_extra = cpu->icount_budget - insns_left;
269
+
270
+ replay_mutex_lock();
271
+
272
+ if (cpu->icount_budget == 0 && replay_has_checkpoint()) {
273
+ notify_aio_contexts();
274
+ }
275
+}
55
+}
276
+
56
+
277
+void process_icount_data(CPUState *cpu)
278
+{
279
+ /* Account for executed instructions */
280
+ icount_update(cpu);
281
+
282
+ /* Reset the counters */
283
+ cpu_neg(cpu)->icount_decr.u16.low = 0;
284
+ cpu->icount_extra = 0;
285
+ cpu->icount_budget = 0;
286
+
287
+ replay_account_executed_instructions();
288
+
289
+ replay_mutex_unlock();
290
+}
291
+
292
+static void icount_handle_interrupt(CPUState *cpu, int mask)
293
+{
294
+ int old_mask = cpu->interrupt_request;
295
+
296
+ tcg_handle_interrupt(cpu, mask);
297
+ if (qemu_cpu_is_self(cpu) &&
298
+ !cpu->can_do_io
299
+ && (mask & ~old_mask) != 0) {
300
+ cpu_abort(cpu, "Raised interrupt while not in I/O function");
301
+ }
302
+}
303
+
304
+const CpusAccel tcg_cpus_icount = {
305
+ .create_vcpu_thread = tcg_start_vcpu_thread,
306
+ .kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
307
+
308
+ .handle_interrupt = icount_handle_interrupt,
309
+ .get_virtual_clock = icount_get,
310
+ .get_elapsed_ticks = icount_get,
311
+};
312
diff --git a/accel/tcg/tcg-cpus-mttcg.c b/accel/tcg/tcg-cpus-mttcg.c
313
new file mode 100644
314
index XXXXXXX..XXXXXXX
315
--- /dev/null
316
+++ b/accel/tcg/tcg-cpus-mttcg.c
317
@@ -XXX,XX +XXX,XX @@
318
+/*
319
+ * QEMU TCG Multi Threaded vCPUs implementation
320
+ *
321
+ * Copyright (c) 2003-2008 Fabrice Bellard
322
+ * Copyright (c) 2014 Red Hat Inc.
323
+ *
324
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
325
+ * of this software and associated documentation files (the "Software"), to deal
326
+ * in the Software without restriction, including without limitation the rights
327
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
328
+ * copies of the Software, and to permit persons to whom the Software is
329
+ * furnished to do so, subject to the following conditions:
330
+ *
331
+ * The above copyright notice and this permission notice shall be included in
332
+ * all copies or substantial portions of the Software.
333
+ *
334
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
335
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
336
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
337
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
338
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
339
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
340
+ * THE SOFTWARE.
341
+ */
342
+
343
+#include "qemu/osdep.h"
344
+#include "qemu-common.h"
345
+#include "sysemu/tcg.h"
346
+#include "sysemu/replay.h"
347
+#include "qemu/main-loop.h"
348
+#include "qemu/guest-random.h"
349
+#include "exec/exec-all.h"
350
+#include "hw/boards.h"
351
+
352
+#include "tcg-cpus.h"
353
+#include "tcg-cpus-mttcg.h"
354
+
355
+/*
356
+ * In the multi-threaded case each vCPU has its own thread. The TLS
357
+ * variable current_cpu can be used deep in the code to find the
358
+ * current CPUState for a given thread.
359
+ */
360
+
361
+void *tcg_cpu_thread_fn(void *arg)
362
+{
363
+ CPUState *cpu = arg;
364
+
365
+ assert(tcg_enabled());
366
+ g_assert(!icount_enabled());
367
+
368
+ rcu_register_thread();
369
+ tcg_register_thread();
370
+
371
+ qemu_mutex_lock_iothread();
372
+ qemu_thread_get_self(cpu->thread);
373
+
374
+ cpu->thread_id = qemu_get_thread_id();
375
+ cpu->can_do_io = 1;
376
+ current_cpu = cpu;
377
+ cpu_thread_signal_created(cpu);
378
+ qemu_guest_random_seed_thread_part2(cpu->random_seed);
379
+
380
+ /* process any pending work */
381
+ cpu->exit_request = 1;
382
+
383
+ do {
384
+ if (cpu_can_run(cpu)) {
385
+ int r;
386
+ qemu_mutex_unlock_iothread();
387
+ r = tcg_cpu_exec(cpu);
388
+ qemu_mutex_lock_iothread();
389
+ switch (r) {
390
+ case EXCP_DEBUG:
391
+ cpu_handle_guest_debug(cpu);
392
+ break;
393
+ case EXCP_HALTED:
394
+ /*
395
+ * during start-up the vCPU is reset and the thread is
396
+ * kicked several times. If we don't ensure we go back
397
+ * to sleep in the halted state we won't cleanly
398
+ * start-up when the vCPU is enabled.
399
+ *
400
+ * cpu->halted should ensure we sleep in wait_io_event
401
+ */
402
+ g_assert(cpu->halted);
403
+ break;
404
+ case EXCP_ATOMIC:
405
+ qemu_mutex_unlock_iothread();
406
+ cpu_exec_step_atomic(cpu);
407
+ qemu_mutex_lock_iothread();
408
+ default:
409
+ /* Ignore everything else? */
410
+ break;
411
+ }
412
+ }
413
+
414
+ qatomic_mb_set(&cpu->exit_request, 0);
415
+ qemu_wait_io_event(cpu);
416
+ } while (!cpu->unplug || cpu_can_run(cpu));
417
+
418
+ qemu_tcg_destroy_vcpu(cpu);
419
+ qemu_mutex_unlock_iothread();
420
+ rcu_unregister_thread();
421
+ return NULL;
422
+}
423
+
424
+static void mttcg_kick_vcpu_thread(CPUState *cpu)
425
+{
426
+ cpu_exit(cpu);
427
+}
428
+
429
+const CpusAccel tcg_cpus_mttcg = {
430
+ .create_vcpu_thread = tcg_start_vcpu_thread,
431
+ .kick_vcpu_thread = mttcg_kick_vcpu_thread,
432
+
433
+ .handle_interrupt = tcg_handle_interrupt,
434
+};
435
diff --git a/accel/tcg/tcg-cpus-rr.c b/accel/tcg/tcg-cpus-rr.c
436
new file mode 100644
437
index XXXXXXX..XXXXXXX
438
--- /dev/null
439
+++ b/accel/tcg/tcg-cpus-rr.c
440
@@ -XXX,XX +XXX,XX @@
441
+/*
442
+ * QEMU TCG Single Threaded vCPUs implementation
443
+ *
444
+ * Copyright (c) 2003-2008 Fabrice Bellard
445
+ * Copyright (c) 2014 Red Hat Inc.
446
+ *
447
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
448
+ * of this software and associated documentation files (the "Software"), to deal
449
+ * in the Software without restriction, including without limitation the rights
450
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
451
+ * copies of the Software, and to permit persons to whom the Software is
452
+ * furnished to do so, subject to the following conditions:
453
+ *
454
+ * The above copyright notice and this permission notice shall be included in
455
+ * all copies or substantial portions of the Software.
456
+ *
457
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
458
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
459
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
460
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
461
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
462
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
463
+ * THE SOFTWARE.
464
+ */
465
+
466
+#include "qemu/osdep.h"
467
+#include "qemu-common.h"
468
+#include "sysemu/tcg.h"
469
+#include "sysemu/replay.h"
470
+#include "qemu/main-loop.h"
471
+#include "qemu/guest-random.h"
472
+#include "exec/exec-all.h"
473
+#include "hw/boards.h"
474
+
475
+#include "tcg-cpus.h"
476
+#include "tcg-cpus-rr.h"
477
+#include "tcg-cpus-icount.h"
478
+
479
+/* Kick all RR vCPUs */
480
+void qemu_cpu_kick_rr_cpus(CPUState *unused)
481
+{
482
+ CPUState *cpu;
483
+
484
+ CPU_FOREACH(cpu) {
485
+ cpu_exit(cpu);
486
+ };
487
+}
488
+
489
+/*
490
+ * TCG vCPU kick timer
491
+ *
492
+ * The kick timer is responsible for moving single threaded vCPU
493
+ * emulation on to the next vCPU. If more than one vCPU is running a
494
+ * timer event with force a cpu->exit so the next vCPU can get
495
+ * scheduled.
496
+ *
497
+ * The timer is removed if all vCPUs are idle and restarted again once
498
+ * idleness is complete.
499
+ */
500
+
501
+static QEMUTimer *tcg_kick_vcpu_timer;
502
+static CPUState *tcg_current_rr_cpu;
503
+
504
+#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
505
+
506
+static inline int64_t qemu_tcg_next_kick(void)
507
+{
508
+ return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
509
+}
510
+
511
+/* Kick the currently round-robin scheduled vCPU to next */
512
+static void qemu_cpu_kick_rr_next_cpu(void)
513
+{
514
+ CPUState *cpu;
515
+ do {
516
+ cpu = qatomic_mb_read(&tcg_current_rr_cpu);
517
+ if (cpu) {
518
+ cpu_exit(cpu);
519
+ }
520
+ } while (cpu != qatomic_mb_read(&tcg_current_rr_cpu));
521
+}
522
+
523
+static void kick_tcg_thread(void *opaque)
524
+{
525
+ timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
526
+ qemu_cpu_kick_rr_next_cpu();
527
+}
528
+
529
+static void start_tcg_kick_timer(void)
530
+{
531
+ if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
532
+ tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
533
+ kick_tcg_thread, NULL);
534
+ }
535
+ if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
536
+ timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
537
+ }
538
+}
539
+
540
+static void stop_tcg_kick_timer(void)
541
+{
542
+ if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
543
+ timer_del(tcg_kick_vcpu_timer);
544
+ }
545
+}
546
+
547
+static void qemu_tcg_rr_wait_io_event(void)
548
+{
549
+ CPUState *cpu;
550
+
551
+ while (all_cpu_threads_idle()) {
552
+ stop_tcg_kick_timer();
553
+ qemu_cond_wait_iothread(first_cpu->halt_cond);
554
+ }
555
+
556
+ start_tcg_kick_timer();
557
+
558
+ CPU_FOREACH(cpu) {
559
+ qemu_wait_io_event_common(cpu);
560
+ }
561
+}
562
+
563
+/*
564
+ * Destroy any remaining vCPUs which have been unplugged and have
565
+ * finished running
566
+ */
567
+static void deal_with_unplugged_cpus(void)
568
+{
569
+ CPUState *cpu;
570
+
571
+ CPU_FOREACH(cpu) {
572
+ if (cpu->unplug && !cpu_can_run(cpu)) {
573
+ qemu_tcg_destroy_vcpu(cpu);
574
+ break;
575
+ }
576
+ }
577
+}
578
+
579
+/*
580
+ * In the single-threaded case each vCPU is simulated in turn. If
581
+ * there is more than a single vCPU we create a simple timer to kick
582
+ * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
583
+ * This is done explicitly rather than relying on side-effects
584
+ * elsewhere.
585
+ */
586
+
587
+void *tcg_rr_cpu_thread_fn(void *arg)
588
+{
589
+ CPUState *cpu = arg;
590
+
591
+ assert(tcg_enabled());
592
+ rcu_register_thread();
593
+ tcg_register_thread();
594
+
595
+ qemu_mutex_lock_iothread();
596
+ qemu_thread_get_self(cpu->thread);
597
+
598
+ cpu->thread_id = qemu_get_thread_id();
599
+ cpu->can_do_io = 1;
600
+ cpu_thread_signal_created(cpu);
601
+ qemu_guest_random_seed_thread_part2(cpu->random_seed);
602
+
603
+ /* wait for initial kick-off after machine start */
604
+ while (first_cpu->stopped) {
605
+ qemu_cond_wait_iothread(first_cpu->halt_cond);
606
+
607
+ /* process any pending work */
608
+ CPU_FOREACH(cpu) {
609
+ current_cpu = cpu;
610
+ qemu_wait_io_event_common(cpu);
611
+ }
612
+ }
613
+
614
+ start_tcg_kick_timer();
615
+
616
+ cpu = first_cpu;
617
+
618
+ /* process any pending work */
619
+ cpu->exit_request = 1;
620
+
621
+ while (1) {
622
+ qemu_mutex_unlock_iothread();
623
+ replay_mutex_lock();
624
+ qemu_mutex_lock_iothread();
625
+
626
+ if (icount_enabled()) {
627
+ /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
628
+ icount_account_warp_timer();
629
+ /*
630
+ * Run the timers here. This is much more efficient than
631
+ * waking up the I/O thread and waiting for completion.
632
+ */
633
+ handle_icount_deadline();
634
+ }
635
+
636
+ replay_mutex_unlock();
637
+
638
+ if (!cpu) {
639
+ cpu = first_cpu;
640
+ }
641
+
642
+ while (cpu && cpu_work_list_empty(cpu) && !cpu->exit_request) {
643
+
644
+ qatomic_mb_set(&tcg_current_rr_cpu, cpu);
645
+ current_cpu = cpu;
646
+
647
+ qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
648
+ (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
649
+
650
+ if (cpu_can_run(cpu)) {
651
+ int r;
652
+
653
+ qemu_mutex_unlock_iothread();
654
+ if (icount_enabled()) {
655
+ prepare_icount_for_run(cpu);
656
+ }
657
+ r = tcg_cpu_exec(cpu);
658
+ if (icount_enabled()) {
659
+ process_icount_data(cpu);
660
+ }
661
+ qemu_mutex_lock_iothread();
662
+
663
+ if (r == EXCP_DEBUG) {
664
+ cpu_handle_guest_debug(cpu);
665
+ break;
666
+ } else if (r == EXCP_ATOMIC) {
667
+ qemu_mutex_unlock_iothread();
668
+ cpu_exec_step_atomic(cpu);
669
+ qemu_mutex_lock_iothread();
670
+ break;
671
+ }
672
+ } else if (cpu->stop) {
673
+ if (cpu->unplug) {
674
+ cpu = CPU_NEXT(cpu);
675
+ }
676
+ break;
677
+ }
678
+
679
+ cpu = CPU_NEXT(cpu);
680
+ } /* while (cpu && !cpu->exit_request).. */
681
+
682
+ /* Does not need qatomic_mb_set because a spurious wakeup is okay. */
683
+ qatomic_set(&tcg_current_rr_cpu, NULL);
684
+
685
+ if (cpu && cpu->exit_request) {
686
+ qatomic_mb_set(&cpu->exit_request, 0);
687
+ }
688
+
689
+ if (icount_enabled() && all_cpu_threads_idle()) {
690
+ /*
691
+ * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
692
+ * in the main_loop, wake it up in order to start the warp timer.
693
+ */
694
+ qemu_notify_event();
695
+ }
696
+
697
+ qemu_tcg_rr_wait_io_event();
698
+ deal_with_unplugged_cpus();
699
+ }
700
+
701
+ rcu_unregister_thread();
702
+ return NULL;
703
+}
704
+
705
+const CpusAccel tcg_cpus_rr = {
706
+ .create_vcpu_thread = tcg_start_vcpu_thread,
707
+ .kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
708
+
709
+ .handle_interrupt = tcg_handle_interrupt,
710
+};
711
diff --git a/accel/tcg/tcg-cpus.c b/accel/tcg/tcg-cpus.c
712
index XXXXXXX..XXXXXXX 100644
713
--- a/accel/tcg/tcg-cpus.c
714
+++ b/accel/tcg/tcg-cpus.c
715
@@ -XXX,XX +XXX,XX @@
716
/*
717
- * QEMU System Emulator
718
+ * QEMU TCG vCPU common functionality
719
+ *
720
+ * Functionality common to all TCG vCPU variants: mttcg, rr and icount.
721
*
722
* Copyright (c) 2003-2008 Fabrice Bellard
723
* Copyright (c) 2014 Red Hat Inc.
724
@@ -XXX,XX +XXX,XX @@
725
#include "hw/boards.h"
726
727
#include "tcg-cpus.h"
728
+#include "tcg-cpus-mttcg.h"
729
+#include "tcg-cpus-rr.h"
730
731
-/* Kick all RR vCPUs */
732
-static void qemu_cpu_kick_rr_cpus(void)
733
-{
734
- CPUState *cpu;
735
+/* common functionality among all TCG variants */
736
737
- CPU_FOREACH(cpu) {
738
- cpu_exit(cpu);
739
- };
740
-}
741
-
742
-static void tcg_kick_vcpu_thread(CPUState *cpu)
743
-{
744
- if (qemu_tcg_mttcg_enabled()) {
745
- cpu_exit(cpu);
746
- } else {
747
- qemu_cpu_kick_rr_cpus();
748
- }
749
-}
750
-
751
-/*
752
- * TCG vCPU kick timer
753
- *
754
- * The kick timer is responsible for moving single threaded vCPU
755
- * emulation on to the next vCPU. If more than one vCPU is running a
756
- * timer event with force a cpu->exit so the next vCPU can get
757
- * scheduled.
758
- *
759
- * The timer is removed if all vCPUs are idle and restarted again once
760
- * idleness is complete.
761
- */
762
-
763
-static QEMUTimer *tcg_kick_vcpu_timer;
764
-static CPUState *tcg_current_rr_cpu;
765
-
766
-#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
767
-
768
-static inline int64_t qemu_tcg_next_kick(void)
769
-{
770
- return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
771
-}
772
-
773
-/* Kick the currently round-robin scheduled vCPU to next */
774
-static void qemu_cpu_kick_rr_next_cpu(void)
775
-{
776
- CPUState *cpu;
777
- do {
778
- cpu = qatomic_mb_read(&tcg_current_rr_cpu);
779
- if (cpu) {
780
- cpu_exit(cpu);
781
- }
782
- } while (cpu != qatomic_mb_read(&tcg_current_rr_cpu));
783
-}
784
-
785
-static void kick_tcg_thread(void *opaque)
786
-{
787
- timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
788
- qemu_cpu_kick_rr_next_cpu();
789
-}
790
-
791
-static void start_tcg_kick_timer(void)
792
-{
793
- assert(!mttcg_enabled);
794
- if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
795
- tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
796
- kick_tcg_thread, NULL);
797
- }
798
- if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
799
- timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
800
- }
801
-}
802
-
803
-static void stop_tcg_kick_timer(void)
804
-{
805
- assert(!mttcg_enabled);
806
- if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
807
- timer_del(tcg_kick_vcpu_timer);
808
- }
809
-}
810
-
811
-static void qemu_tcg_destroy_vcpu(CPUState *cpu)
812
-{
813
-}
814
-
815
-static void qemu_tcg_rr_wait_io_event(void)
816
-{
817
- CPUState *cpu;
818
-
819
- while (all_cpu_threads_idle()) {
820
- stop_tcg_kick_timer();
821
- qemu_cond_wait_iothread(first_cpu->halt_cond);
822
- }
823
-
824
- start_tcg_kick_timer();
825
-
826
- CPU_FOREACH(cpu) {
827
- qemu_wait_io_event_common(cpu);
828
- }
829
-}
830
-
831
-static int64_t tcg_get_icount_limit(void)
832
-{
833
- int64_t deadline;
834
-
835
- if (replay_mode != REPLAY_MODE_PLAY) {
836
- /*
837
- * Include all the timers, because they may need an attention.
838
- * Too long CPU execution may create unnecessary delay in UI.
839
- */
840
- deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
841
- QEMU_TIMER_ATTR_ALL);
842
- /* Check realtime timers, because they help with input processing */
843
- deadline = qemu_soonest_timeout(deadline,
844
- qemu_clock_deadline_ns_all(QEMU_CLOCK_REALTIME,
845
- QEMU_TIMER_ATTR_ALL));
846
-
847
- /*
848
- * Maintain prior (possibly buggy) behaviour where if no deadline
849
- * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
850
- * INT32_MAX nanoseconds ahead, we still use INT32_MAX
851
- * nanoseconds.
852
- */
853
- if ((deadline < 0) || (deadline > INT32_MAX)) {
854
- deadline = INT32_MAX;
855
- }
856
-
857
- return icount_round(deadline);
858
- } else {
859
- return replay_get_instructions();
860
- }
861
-}
862
-
863
-static void notify_aio_contexts(void)
864
-{
865
- /* Wake up other AioContexts. */
866
- qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
867
- qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
868
-}
869
-
870
-static void handle_icount_deadline(void)
871
-{
872
- assert(qemu_in_vcpu_thread());
873
- if (icount_enabled()) {
874
- int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
875
- QEMU_TIMER_ATTR_ALL);
876
-
877
- if (deadline == 0) {
878
- notify_aio_contexts();
879
- }
880
- }
881
-}
882
-
883
-static void prepare_icount_for_run(CPUState *cpu)
884
-{
885
- if (icount_enabled()) {
886
- int insns_left;
887
-
888
- /*
889
- * These should always be cleared by process_icount_data after
890
- * each vCPU execution. However u16.high can be raised
891
- * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
892
- */
893
- g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
894
- g_assert(cpu->icount_extra == 0);
895
-
896
- cpu->icount_budget = tcg_get_icount_limit();
897
- insns_left = MIN(0xffff, cpu->icount_budget);
898
- cpu_neg(cpu)->icount_decr.u16.low = insns_left;
899
- cpu->icount_extra = cpu->icount_budget - insns_left;
900
-
901
- replay_mutex_lock();
902
-
903
- if (cpu->icount_budget == 0 && replay_has_checkpoint()) {
904
- notify_aio_contexts();
905
- }
906
- }
907
-}
908
-
909
-static void process_icount_data(CPUState *cpu)
910
-{
911
- if (icount_enabled()) {
912
- /* Account for executed instructions */
913
- icount_update(cpu);
914
-
915
- /* Reset the counters */
916
- cpu_neg(cpu)->icount_decr.u16.low = 0;
917
- cpu->icount_extra = 0;
918
- cpu->icount_budget = 0;
919
-
920
- replay_account_executed_instructions();
921
-
922
- replay_mutex_unlock();
923
- }
924
-}
925
-
926
-static int tcg_cpu_exec(CPUState *cpu)
927
-{
928
- int ret;
929
-#ifdef CONFIG_PROFILER
930
- int64_t ti;
931
-#endif
932
-
933
- assert(tcg_enabled());
934
-#ifdef CONFIG_PROFILER
935
- ti = profile_getclock();
936
-#endif
937
- cpu_exec_start(cpu);
938
- ret = cpu_exec(cpu);
939
- cpu_exec_end(cpu);
940
-#ifdef CONFIG_PROFILER
941
- qatomic_set(&tcg_ctx->prof.cpu_exec_time,
942
- tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
943
-#endif
944
- return ret;
945
-}
946
-
947
-/*
948
- * Destroy any remaining vCPUs which have been unplugged and have
949
- * finished running
950
- */
951
-static void deal_with_unplugged_cpus(void)
952
-{
953
- CPUState *cpu;
954
-
955
- CPU_FOREACH(cpu) {
956
- if (cpu->unplug && !cpu_can_run(cpu)) {
957
- qemu_tcg_destroy_vcpu(cpu);
958
- cpu_thread_signal_destroyed(cpu);
959
- break;
960
- }
961
- }
962
-}
963
-
964
-/*
965
- * Single-threaded TCG
966
- *
967
- * In the single-threaded case each vCPU is simulated in turn. If
968
- * there is more than a single vCPU we create a simple timer to kick
969
- * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
970
- * This is done explicitly rather than relying on side-effects
971
- * elsewhere.
972
- */
973
-
974
-static void *tcg_rr_cpu_thread_fn(void *arg)
975
-{
976
- CPUState *cpu = arg;
977
-
978
- assert(tcg_enabled());
979
- rcu_register_thread();
980
- tcg_register_thread();
981
-
982
- qemu_mutex_lock_iothread();
983
- qemu_thread_get_self(cpu->thread);
984
-
985
- cpu->thread_id = qemu_get_thread_id();
986
- cpu->can_do_io = 1;
987
- cpu_thread_signal_created(cpu);
988
- qemu_guest_random_seed_thread_part2(cpu->random_seed);
989
-
990
- /* wait for initial kick-off after machine start */
991
- while (first_cpu->stopped) {
992
- qemu_cond_wait_iothread(first_cpu->halt_cond);
993
-
994
- /* process any pending work */
995
- CPU_FOREACH(cpu) {
996
- current_cpu = cpu;
997
- qemu_wait_io_event_common(cpu);
998
- }
999
- }
1000
-
1001
- start_tcg_kick_timer();
1002
-
1003
- cpu = first_cpu;
1004
-
1005
- /* process any pending work */
1006
- cpu->exit_request = 1;
1007
-
1008
- while (1) {
1009
- qemu_mutex_unlock_iothread();
1010
- replay_mutex_lock();
1011
- qemu_mutex_lock_iothread();
1012
- /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1013
- icount_account_warp_timer();
1014
-
1015
- /*
1016
- * Run the timers here. This is much more efficient than
1017
- * waking up the I/O thread and waiting for completion.
1018
- */
1019
- handle_icount_deadline();
1020
-
1021
- replay_mutex_unlock();
1022
-
1023
- if (!cpu) {
1024
- cpu = first_cpu;
1025
- }
1026
-
1027
- while (cpu && cpu_work_list_empty(cpu) && !cpu->exit_request) {
1028
-
1029
- qatomic_mb_set(&tcg_current_rr_cpu, cpu);
1030
- current_cpu = cpu;
1031
-
1032
- qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1033
- (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1034
-
1035
- if (cpu_can_run(cpu)) {
1036
- int r;
1037
-
1038
- qemu_mutex_unlock_iothread();
1039
- prepare_icount_for_run(cpu);
1040
-
1041
- r = tcg_cpu_exec(cpu);
1042
-
1043
- process_icount_data(cpu);
1044
- qemu_mutex_lock_iothread();
1045
-
1046
- if (r == EXCP_DEBUG) {
1047
- cpu_handle_guest_debug(cpu);
1048
- break;
1049
- } else if (r == EXCP_ATOMIC) {
1050
- qemu_mutex_unlock_iothread();
1051
- cpu_exec_step_atomic(cpu);
1052
- qemu_mutex_lock_iothread();
1053
- break;
1054
- }
1055
- } else if (cpu->stop) {
1056
- if (cpu->unplug) {
1057
- cpu = CPU_NEXT(cpu);
1058
- }
1059
- break;
1060
- }
1061
-
1062
- cpu = CPU_NEXT(cpu);
1063
- } /* while (cpu && !cpu->exit_request).. */
1064
-
1065
- /* Does not need qatomic_mb_set because a spurious wakeup is okay. */
1066
- qatomic_set(&tcg_current_rr_cpu, NULL);
1067
-
1068
- if (cpu && cpu->exit_request) {
1069
- qatomic_mb_set(&cpu->exit_request, 0);
1070
- }
1071
-
1072
- if (icount_enabled() && all_cpu_threads_idle()) {
1073
- /*
1074
- * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
1075
- * in the main_loop, wake it up in order to start the warp timer.
1076
- */
1077
- qemu_notify_event();
1078
- }
1079
-
1080
- qemu_tcg_rr_wait_io_event();
1081
- deal_with_unplugged_cpus();
1082
- }
1083
-
1084
- rcu_unregister_thread();
1085
- return NULL;
1086
-}
1087
-
1088
-/*
1089
- * Multi-threaded TCG
1090
- *
1091
- * In the multi-threaded case each vCPU has its own thread. The TLS
1092
- * variable current_cpu can be used deep in the code to find the
1093
- * current CPUState for a given thread.
1094
- */
1095
-
1096
-static void *tcg_cpu_thread_fn(void *arg)
1097
-{
1098
- CPUState *cpu = arg;
1099
-
1100
- assert(tcg_enabled());
1101
- g_assert(!icount_enabled());
1102
-
1103
- rcu_register_thread();
1104
- tcg_register_thread();
1105
-
1106
- qemu_mutex_lock_iothread();
1107
- qemu_thread_get_self(cpu->thread);
1108
-
1109
- cpu->thread_id = qemu_get_thread_id();
1110
- cpu->can_do_io = 1;
1111
- current_cpu = cpu;
1112
- cpu_thread_signal_created(cpu);
1113
- qemu_guest_random_seed_thread_part2(cpu->random_seed);
1114
-
1115
- /* process any pending work */
1116
- cpu->exit_request = 1;
1117
-
1118
- do {
1119
- if (cpu_can_run(cpu)) {
1120
- int r;
1121
- qemu_mutex_unlock_iothread();
1122
- r = tcg_cpu_exec(cpu);
1123
- qemu_mutex_lock_iothread();
1124
- switch (r) {
1125
- case EXCP_DEBUG:
1126
- cpu_handle_guest_debug(cpu);
1127
- break;
1128
- case EXCP_HALTED:
1129
- /*
1130
- * during start-up the vCPU is reset and the thread is
1131
- * kicked several times. If we don't ensure we go back
1132
- * to sleep in the halted state we won't cleanly
1133
- * start-up when the vCPU is enabled.
1134
- *
1135
- * cpu->halted should ensure we sleep in wait_io_event
1136
- */
1137
- g_assert(cpu->halted);
1138
- break;
1139
- case EXCP_ATOMIC:
1140
- qemu_mutex_unlock_iothread();
1141
- cpu_exec_step_atomic(cpu);
1142
- qemu_mutex_lock_iothread();
1143
- default:
1144
- /* Ignore everything else? */
1145
- break;
1146
- }
1147
- }
1148
-
1149
- qatomic_mb_set(&cpu->exit_request, 0);
1150
- qemu_wait_io_event(cpu);
1151
- } while (!cpu->unplug || cpu_can_run(cpu));
1152
-
1153
- qemu_tcg_destroy_vcpu(cpu);
1154
- cpu_thread_signal_destroyed(cpu);
1155
- qemu_mutex_unlock_iothread();
1156
- rcu_unregister_thread();
1157
- return NULL;
1158
-}
1159
-
1160
-static void tcg_start_vcpu_thread(CPUState *cpu)
1161
+void tcg_start_vcpu_thread(CPUState *cpu)
1162
{
1163
char thread_name[VCPU_THREAD_NAME_SIZE];
1164
static QemuCond *single_tcg_halt_cond;
1165
@@ -XXX,XX +XXX,XX @@ static void tcg_start_vcpu_thread(CPUState *cpu)
1166
}
1167
}
1168
1169
-static int64_t tcg_get_virtual_clock(void)
1170
+void qemu_tcg_destroy_vcpu(CPUState *cpu)
1171
{
1172
- if (icount_enabled()) {
1173
- return icount_get();
1174
- }
1175
- return cpu_get_clock();
1176
+ cpu_thread_signal_destroyed(cpu);
1177
}
1178
1179
-static int64_t tcg_get_elapsed_ticks(void)
1180
+int tcg_cpu_exec(CPUState *cpu)
1181
{
1182
- if (icount_enabled()) {
1183
- return icount_get();
1184
- }
1185
- return cpu_get_ticks();
1186
+ int ret;
1187
+#ifdef CONFIG_PROFILER
1188
+ int64_t ti;
1189
+#endif
57
+#endif
1190
+ assert(tcg_enabled());
1191
+#ifdef CONFIG_PROFILER
1192
+ ti = profile_getclock();
1193
+#endif
1194
+ cpu_exec_start(cpu);
1195
+ ret = cpu_exec(cpu);
1196
+ cpu_exec_end(cpu);
1197
+#ifdef CONFIG_PROFILER
1198
+ qatomic_set(&tcg_ctx->prof.cpu_exec_time,
1199
+ tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
1200
+#endif
1201
+ return ret;
1202
}
1203
1204
/* mask must never be zero, except for A20 change call */
1205
-static void tcg_handle_interrupt(CPUState *cpu, int mask)
1206
+void tcg_handle_interrupt(CPUState *cpu, int mask)
1207
{
1208
- int old_mask;
1209
g_assert(qemu_mutex_iothread_locked());
1210
1211
- old_mask = cpu->interrupt_request;
1212
cpu->interrupt_request |= mask;
1213
1214
/*
1215
@@ -XXX,XX +XXX,XX @@ static void tcg_handle_interrupt(CPUState *cpu, int mask)
1216
qemu_cpu_kick(cpu);
1217
} else {
1218
qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
1219
- if (icount_enabled() &&
1220
- !cpu->can_do_io
1221
- && (mask & ~old_mask) != 0) {
1222
- cpu_abort(cpu, "Raised interrupt while not in I/O function");
1223
- }
1224
}
1225
}
1226
-
1227
-const CpusAccel tcg_cpus = {
1228
- .create_vcpu_thread = tcg_start_vcpu_thread,
1229
- .kick_vcpu_thread = tcg_kick_vcpu_thread,
1230
-
1231
- .handle_interrupt = tcg_handle_interrupt,
1232
-
1233
- .get_virtual_clock = tcg_get_virtual_clock,
1234
- .get_elapsed_ticks = tcg_get_elapsed_ticks,
1235
-};
1236
diff --git a/softmmu/icount.c b/softmmu/icount.c
1237
index XXXXXXX..XXXXXXX 100644
1238
--- a/softmmu/icount.c
1239
+++ b/softmmu/icount.c
1240
@@ -XXX,XX +XXX,XX @@ void icount_start_warp_timer(void)
1241
1242
void icount_account_warp_timer(void)
1243
{
1244
- if (!icount_enabled() || !icount_sleep) {
1245
+ if (!icount_sleep) {
1246
return;
1247
}
1248
1249
diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build
1250
index XXXXXXX..XXXXXXX 100644
1251
--- a/accel/tcg/meson.build
1252
+++ b/accel/tcg/meson.build
1253
@@ -XXX,XX +XXX,XX @@ tcg_ss.add(when: 'CONFIG_SOFTMMU', if_false: files('user-exec-stub.c'))
1254
tcg_ss.add(when: 'CONFIG_PLUGIN', if_true: [files('plugin-gen.c'), libdl])
1255
specific_ss.add_all(when: 'CONFIG_TCG', if_true: tcg_ss)
1256
1257
-specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: files('tcg-all.c', 'cputlb.c', 'tcg-cpus.c'))
1258
+specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: files(
1259
+ 'tcg-all.c',
1260
+ 'cputlb.c',
1261
+ 'tcg-cpus.c',
1262
+ 'tcg-cpus-mttcg.c',
1263
+ 'tcg-cpus-icount.c',
1264
+ 'tcg-cpus-rr.c'
1265
+))
1266
--
58
--
1267
2.25.1
59
2.34.1
1268
1269
diff view generated by jsdifflib
New patch
1
Marking helpers __attribute__((noinline)) prevents an issue
2
with GCC's ipa-split pass under --enable-lto.
1
3
4
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1454
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Tested-by: Idan Horowitz <idan.horowitz@gmail.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
include/exec/helper-proto.h | 32 ++++++++++++++++++++++++--------
10
1 file changed, 24 insertions(+), 8 deletions(-)
11
12
diff --git a/include/exec/helper-proto.h b/include/exec/helper-proto.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/include/exec/helper-proto.h
15
+++ b/include/exec/helper-proto.h
16
@@ -XXX,XX +XXX,XX @@
17
18
#include "exec/helper-head.h"
19
20
+/*
21
+ * Work around an issue with --enable-lto, in which GCC's ipa-split pass
22
+ * decides to split out the noreturn code paths that raise an exception,
23
+ * taking the __builtin_return_address() along into the new function,
24
+ * where it no longer computes a value that returns to TCG generated code.
25
+ * Despite the name, the noinline attribute affects splitter, so this
26
+ * prevents the optimization in question. Given that helpers should not
27
+ * otherwise be called directly, this should have any other visible effect.
28
+ *
29
+ * See https://gitlab.com/qemu-project/qemu/-/issues/1454
30
+ */
31
+#define DEF_HELPER_ATTR __attribute__((noinline))
32
+
33
#define DEF_HELPER_FLAGS_0(name, flags, ret) \
34
-dh_ctype(ret) HELPER(name) (void);
35
+dh_ctype(ret) HELPER(name) (void) DEF_HELPER_ATTR;
36
37
#define DEF_HELPER_FLAGS_1(name, flags, ret, t1) \
38
-dh_ctype(ret) HELPER(name) (dh_ctype(t1));
39
+dh_ctype(ret) HELPER(name) (dh_ctype(t1)) DEF_HELPER_ATTR;
40
41
#define DEF_HELPER_FLAGS_2(name, flags, ret, t1, t2) \
42
-dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2));
43
+dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2)) DEF_HELPER_ATTR;
44
45
#define DEF_HELPER_FLAGS_3(name, flags, ret, t1, t2, t3) \
46
-dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3));
47
+dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), \
48
+ dh_ctype(t3)) DEF_HELPER_ATTR;
49
50
#define DEF_HELPER_FLAGS_4(name, flags, ret, t1, t2, t3, t4) \
51
dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
52
- dh_ctype(t4));
53
+ dh_ctype(t4)) DEF_HELPER_ATTR;
54
55
#define DEF_HELPER_FLAGS_5(name, flags, ret, t1, t2, t3, t4, t5) \
56
dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
57
- dh_ctype(t4), dh_ctype(t5));
58
+ dh_ctype(t4), dh_ctype(t5)) DEF_HELPER_ATTR;
59
60
#define DEF_HELPER_FLAGS_6(name, flags, ret, t1, t2, t3, t4, t5, t6) \
61
dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
62
- dh_ctype(t4), dh_ctype(t5), dh_ctype(t6));
63
+ dh_ctype(t4), dh_ctype(t5), \
64
+ dh_ctype(t6)) DEF_HELPER_ATTR;
65
66
#define DEF_HELPER_FLAGS_7(name, flags, ret, t1, t2, t3, t4, t5, t6, t7) \
67
dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
68
dh_ctype(t4), dh_ctype(t5), dh_ctype(t6), \
69
- dh_ctype(t7));
70
+ dh_ctype(t7)) DEF_HELPER_ATTR;
71
72
#define IN_HELPER_PROTO
73
74
@@ -XXX,XX +XXX,XX @@ dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
75
#undef DEF_HELPER_FLAGS_5
76
#undef DEF_HELPER_FLAGS_6
77
#undef DEF_HELPER_FLAGS_7
78
+#undef DEF_HELPER_ATTR
79
80
#endif /* HELPER_PROTO_H */
81
--
82
2.34.1
83
84
diff view generated by jsdifflib
New patch
1
Reuse the decodetree based disassembler from
2
target/loongarch/ for tcg/loongarch64/.
1
3
4
The generation of decode-insns.c.inc into ./libcommon.fa.p/ could
5
eventually result in conflict, if any other host requires the same
6
trick, but this is good enough for now.
7
8
Reviewed-by: WANG Xuerui <git@xen0n.name>
9
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
12
disas.c | 2 ++
13
target/loongarch/meson.build | 3 ++-
14
2 files changed, 4 insertions(+), 1 deletion(-)
15
16
diff --git a/disas.c b/disas.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/disas.c
19
+++ b/disas.c
20
@@ -XXX,XX +XXX,XX @@ static void initialize_debug_host(CPUDebug *s)
21
s->info.cap_insn_split = 6;
22
#elif defined(__hppa__)
23
s->info.print_insn = print_insn_hppa;
24
+#elif defined(__loongarch__)
25
+ s->info.print_insn = print_insn_loongarch;
26
#endif
27
}
28
29
diff --git a/target/loongarch/meson.build b/target/loongarch/meson.build
30
index XXXXXXX..XXXXXXX 100644
31
--- a/target/loongarch/meson.build
32
+++ b/target/loongarch/meson.build
33
@@ -XXX,XX +XXX,XX @@ gen = decodetree.process('insns.decode')
34
loongarch_ss = ss.source_set()
35
loongarch_ss.add(files(
36
'cpu.c',
37
- 'disas.c',
38
))
39
loongarch_tcg_ss = ss.source_set()
40
loongarch_tcg_ss.add(gen)
41
@@ -XXX,XX +XXX,XX @@ loongarch_softmmu_ss.add(files(
42
'iocsr_helper.c',
43
))
44
45
+common_ss.add(when: 'CONFIG_LOONGARCH_DIS', if_true: [files('disas.c'), gen])
46
+
47
loongarch_ss.add_all(when: 'CONFIG_TCG', if_true: [loongarch_tcg_ss])
48
49
target_arch += {'loongarch': loongarch_ss}
50
--
51
2.34.1
52
53
diff view generated by jsdifflib
New patch
1
While jirl shares the same instruction format as bne etc,
2
it is not assembled the same. In particular, rd is printed
3
first not second and the immediate is not pc-relative.
1
4
5
Decode into the arg_rr_i structure, which prints correctly.
6
This changes the "offs" member to "imm", to update translate.
7
8
Reviewed-by: WANG Xuerui <git@xen0n.name>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
target/loongarch/insns.decode | 3 ++-
12
target/loongarch/disas.c | 2 +-
13
target/loongarch/insn_trans/trans_branch.c.inc | 2 +-
14
3 files changed, 4 insertions(+), 3 deletions(-)
15
16
diff --git a/target/loongarch/insns.decode b/target/loongarch/insns.decode
17
index XXXXXXX..XXXXXXX 100644
18
--- a/target/loongarch/insns.decode
19
+++ b/target/loongarch/insns.decode
20
@@ -XXX,XX +XXX,XX @@
21
@rr_ui12 .... ...... imm:12 rj:5 rd:5 &rr_i
22
@rr_i14s2 .... .... .............. rj:5 rd:5 &rr_i imm=%i14s2
23
@rr_i16 .... .. imm:s16 rj:5 rd:5 &rr_i
24
+@rr_i16s2 .... .. ................ rj:5 rd:5 &rr_i imm=%offs16
25
@hint_r_i12 .... ...... imm:s12 rj:5 hint:5 &hint_r_i
26
@rrr_sa2p1 .... ........ ... .. rk:5 rj:5 rd:5 &rrr_sa sa=%sa2p1
27
@rrr_sa2 .... ........ ... sa:2 rk:5 rj:5 rd:5 &rrr_sa
28
@@ -XXX,XX +XXX,XX @@ beqz 0100 00 ................ ..... ..... @r_offs21
29
bnez 0100 01 ................ ..... ..... @r_offs21
30
bceqz 0100 10 ................ 00 ... ..... @c_offs21
31
bcnez 0100 10 ................ 01 ... ..... @c_offs21
32
-jirl 0100 11 ................ ..... ..... @rr_offs16
33
+jirl 0100 11 ................ ..... ..... @rr_i16s2
34
b 0101 00 .......................... @offs26
35
bl 0101 01 .......................... @offs26
36
beq 0101 10 ................ ..... ..... @rr_offs16
37
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/target/loongarch/disas.c
40
+++ b/target/loongarch/disas.c
41
@@ -XXX,XX +XXX,XX @@ INSN(beqz, r_offs)
42
INSN(bnez, r_offs)
43
INSN(bceqz, c_offs)
44
INSN(bcnez, c_offs)
45
-INSN(jirl, rr_offs)
46
+INSN(jirl, rr_i)
47
INSN(b, offs)
48
INSN(bl, offs)
49
INSN(beq, rr_offs)
50
diff --git a/target/loongarch/insn_trans/trans_branch.c.inc b/target/loongarch/insn_trans/trans_branch.c.inc
51
index XXXXXXX..XXXXXXX 100644
52
--- a/target/loongarch/insn_trans/trans_branch.c.inc
53
+++ b/target/loongarch/insn_trans/trans_branch.c.inc
54
@@ -XXX,XX +XXX,XX @@ static bool trans_jirl(DisasContext *ctx, arg_jirl *a)
55
TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
56
TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
57
58
- tcg_gen_addi_tl(cpu_pc, src1, a->offs);
59
+ tcg_gen_addi_tl(cpu_pc, src1, a->imm);
60
tcg_gen_movi_tl(dest, ctx->base.pc_next + 4);
61
gen_set_gpr(a->rd, dest, EXT_NONE);
62
tcg_gen_lookup_and_goto_ptr();
63
--
64
2.34.1
diff view generated by jsdifflib
New patch
1
Print both the raw field and the resolved pc-relative
2
address, as we do for branches.
1
3
4
Reviewed-by: WANG Xuerui <git@xen0n.name>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
target/loongarch/disas.c | 37 +++++++++++++++++++++++++++++++++----
8
1 file changed, 33 insertions(+), 4 deletions(-)
9
10
diff --git a/target/loongarch/disas.c b/target/loongarch/disas.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/target/loongarch/disas.c
13
+++ b/target/loongarch/disas.c
14
@@ -XXX,XX +XXX,XX @@ INSN(fsel, fffc)
15
INSN(addu16i_d, rr_i)
16
INSN(lu12i_w, r_i)
17
INSN(lu32i_d, r_i)
18
-INSN(pcaddi, r_i)
19
-INSN(pcalau12i, r_i)
20
-INSN(pcaddu12i, r_i)
21
-INSN(pcaddu18i, r_i)
22
INSN(ll_w, rr_i)
23
INSN(sc_w, rr_i)
24
INSN(ll_d, rr_i)
25
@@ -XXX,XX +XXX,XX @@ static bool trans_fcmp_cond_##suffix(DisasContext *ctx, \
26
27
FCMP_INSN(s)
28
FCMP_INSN(d)
29
+
30
+#define PCADD_INSN(name) \
31
+static bool trans_##name(DisasContext *ctx, arg_##name *a) \
32
+{ \
33
+ output(ctx, #name, "r%d, %d # 0x%" PRIx64, \
34
+ a->rd, a->imm, gen_##name(ctx->pc, a->imm)); \
35
+ return true; \
36
+}
37
+
38
+static uint64_t gen_pcaddi(uint64_t pc, int imm)
39
+{
40
+ return pc + (imm << 2);
41
+}
42
+
43
+static uint64_t gen_pcalau12i(uint64_t pc, int imm)
44
+{
45
+ return (pc + (imm << 12)) & ~0xfff;
46
+}
47
+
48
+static uint64_t gen_pcaddu12i(uint64_t pc, int imm)
49
+{
50
+ return pc + (imm << 12);
51
+}
52
+
53
+static uint64_t gen_pcaddu18i(uint64_t pc, int imm)
54
+{
55
+ return pc + ((uint64_t)(imm) << 18);
56
+}
57
+
58
+PCADD_INSN(pcaddi)
59
+PCADD_INSN(pcalau12i)
60
+PCADD_INSN(pcaddu12i)
61
+PCADD_INSN(pcaddu18i)
62
--
63
2.34.1
diff view generated by jsdifflib
New patch
1
From: Rui Wang <wangrui@loongson.cn>
1
2
3
diff:
4
Imm Before After
5
0000000000000000 addi.w rd, zero, 0 addi.w rd, zero, 0
6
lu52i.d rd, zero, 0
7
00000000fffff800 lu12i.w rd, -1 addi.w rd, zero, -2048
8
ori rd, rd, 2048 lu32i.d rd, 0
9
lu32i.d rd, 0
10
11
Reviewed-by: WANG Xuerui <git@xen0n.name>
12
Signed-off-by: Rui Wang <wangrui@loongson.cn>
13
Message-Id: <20221107144713.845550-1-wangrui@loongson.cn>
14
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
15
---
16
tcg/loongarch64/tcg-target.c.inc | 35 +++++++++++---------------------
17
1 file changed, 12 insertions(+), 23 deletions(-)
18
19
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
20
index XXXXXXX..XXXXXXX 100644
21
--- a/tcg/loongarch64/tcg-target.c.inc
22
+++ b/tcg/loongarch64/tcg-target.c.inc
23
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
24
return true;
25
}
26
27
-static bool imm_part_needs_loading(bool high_bits_are_ones,
28
- tcg_target_long part)
29
-{
30
- if (high_bits_are_ones) {
31
- return part != -1;
32
- } else {
33
- return part != 0;
34
- }
35
-}
36
-
37
/* Loads a 32-bit immediate into rd, sign-extended. */
38
static void tcg_out_movi_i32(TCGContext *s, TCGReg rd, int32_t val)
39
{
40
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_i32(TCGContext *s, TCGReg rd, int32_t val)
41
tcg_target_long hi12 = sextreg(val, 12, 20);
42
43
/* Single-instruction cases. */
44
- if (lo == val) {
45
- /* val fits in simm12: addi.w rd, zero, val */
46
- tcg_out_opc_addi_w(s, rd, TCG_REG_ZERO, val);
47
- return;
48
- }
49
- if (0x800 <= val && val <= 0xfff) {
50
+ if (hi12 == 0) {
51
/* val fits in uimm12: ori rd, zero, val */
52
tcg_out_opc_ori(s, rd, TCG_REG_ZERO, val);
53
return;
54
}
55
+ if (hi12 == sextreg(lo, 12, 20)) {
56
+ /* val fits in simm12: addi.w rd, zero, val */
57
+ tcg_out_opc_addi_w(s, rd, TCG_REG_ZERO, val);
58
+ return;
59
+ }
60
61
/* High bits must be set; load with lu12i.w + optional ori. */
62
tcg_out_opc_lu12i_w(s, rd, hi12);
63
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
64
65
intptr_t pc_offset;
66
tcg_target_long val_lo, val_hi, pc_hi, offset_hi;
67
- tcg_target_long hi32, hi52;
68
- bool rd_high_bits_are_ones;
69
+ tcg_target_long hi12, hi32, hi52;
70
71
/* Value fits in signed i32. */
72
if (type == TCG_TYPE_I32 || val == (int32_t)val) {
73
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
74
return;
75
}
76
77
+ hi12 = sextreg(val, 12, 20);
78
hi32 = sextreg(val, 32, 20);
79
hi52 = sextreg(val, 52, 12);
80
81
/* Single cu52i.d case. */
82
- if (ctz64(val) >= 52) {
83
+ if ((hi52 != 0) && (ctz64(val) >= 52)) {
84
tcg_out_opc_cu52i_d(s, rd, TCG_REG_ZERO, hi52);
85
return;
86
}
87
88
/* Slow path. Initialize the low 32 bits, then concat high bits. */
89
tcg_out_movi_i32(s, rd, val);
90
- rd_high_bits_are_ones = (int32_t)val < 0;
91
92
- if (imm_part_needs_loading(rd_high_bits_are_ones, hi32)) {
93
+ /* Load hi32 and hi52 explicitly when they are unexpected values. */
94
+ if (hi32 != sextreg(hi12, 20, 20)) {
95
tcg_out_opc_cu32i_d(s, rd, hi32);
96
- rd_high_bits_are_ones = hi32 < 0;
97
}
98
99
- if (imm_part_needs_loading(rd_high_bits_are_ones, hi52)) {
100
+ if (hi52 != sextreg(hi32, 20, 12)) {
101
tcg_out_opc_cu52i_d(s, rd, rd, hi52);
102
}
103
}
104
--
105
2.34.1
diff view generated by jsdifflib
1
From: Claudio Fontana <cfontana@suse.de>
1
Regenerate with ADDU16I included:
2
2
3
after the initial split into 3 tcg variants, we proceed to also
3
$ cd loongarch-opcodes/scripts/go
4
split tcg_start_vcpu_thread.
4
$ go run ./genqemutcgdefs > $QEMU/tcg/loongarch64/tcg-insn-defs.c.inc
5
5
6
We actually split it in 2 this time, since the icount variant
6
Reviewed-by: WANG Xuerui <git@xen0n.name>
7
just uses the round robin function.
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
8
9
Suggested-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Claudio Fontana <cfontana@suse.de>
11
Message-Id: <20201015143217.29337-3-cfontana@suse.de>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
9
---
14
accel/tcg/tcg-cpus-mttcg.h | 21 --------------
10
tcg/loongarch64/tcg-insn-defs.c.inc | 10 +++++++++-
15
accel/tcg/tcg-cpus-rr.h | 3 +-
11
1 file changed, 9 insertions(+), 1 deletion(-)
16
accel/tcg/tcg-cpus.h | 1 -
17
accel/tcg/tcg-all.c | 5 ++++
18
accel/tcg/tcg-cpus-icount.c | 2 +-
19
accel/tcg/tcg-cpus-mttcg.c | 29 +++++++++++++++++--
20
accel/tcg/tcg-cpus-rr.c | 39 +++++++++++++++++++++++--
21
accel/tcg/tcg-cpus.c | 58 -------------------------------------
22
8 files changed, 71 insertions(+), 87 deletions(-)
23
delete mode 100644 accel/tcg/tcg-cpus-mttcg.h
24
12
25
diff --git a/accel/tcg/tcg-cpus-mttcg.h b/accel/tcg/tcg-cpus-mttcg.h
13
diff --git a/tcg/loongarch64/tcg-insn-defs.c.inc b/tcg/loongarch64/tcg-insn-defs.c.inc
26
deleted file mode 100644
14
index XXXXXXX..XXXXXXX 100644
27
index XXXXXXX..XXXXXXX
15
--- a/tcg/loongarch64/tcg-insn-defs.c.inc
28
--- a/accel/tcg/tcg-cpus-mttcg.h
16
+++ b/tcg/loongarch64/tcg-insn-defs.c.inc
29
+++ /dev/null
30
@@ -XXX,XX +XXX,XX @@
17
@@ -XXX,XX +XXX,XX @@
31
-/*
18
*
32
- * QEMU TCG Multi Threaded vCPUs implementation
19
* This file is auto-generated by genqemutcgdefs from
33
- *
20
* https://github.com/loongson-community/loongarch-opcodes,
34
- * Copyright 2020 SUSE LLC
21
- * from commit 961f0c60f5b63e574d785995600c71ad5413fdc4.
35
- *
22
+ * from commit 25ca7effe9d88101c1cf96c4005423643386d81f.
36
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
23
* DO NOT EDIT.
37
- * See the COPYING file in the top-level directory.
24
*/
38
- */
25
39
-
26
@@ -XXX,XX +XXX,XX @@ typedef enum {
40
-#ifndef TCG_CPUS_MTTCG_H
27
OPC_ANDI = 0x03400000,
41
-#define TCG_CPUS_MTTCG_H
28
OPC_ORI = 0x03800000,
42
-
29
OPC_XORI = 0x03c00000,
43
-/*
30
+ OPC_ADDU16I_D = 0x10000000,
44
- * In the multi-threaded case each vCPU has its own thread. The TLS
31
OPC_LU12I_W = 0x14000000,
45
- * variable current_cpu can be used deep in the code to find the
32
OPC_CU32I_D = 0x16000000,
46
- * current CPUState for a given thread.
33
OPC_PCADDU2I = 0x18000000,
47
- */
34
@@ -XXX,XX +XXX,XX @@ tcg_out_opc_xori(TCGContext *s, TCGReg d, TCGReg j, uint32_t uk12)
48
-
35
tcg_out32(s, encode_djuk12_insn(OPC_XORI, d, j, uk12));
49
-void *tcg_cpu_thread_fn(void *arg);
50
-
51
-#endif /* TCG_CPUS_MTTCG_H */
52
diff --git a/accel/tcg/tcg-cpus-rr.h b/accel/tcg/tcg-cpus-rr.h
53
index XXXXXXX..XXXXXXX 100644
54
--- a/accel/tcg/tcg-cpus-rr.h
55
+++ b/accel/tcg/tcg-cpus-rr.h
56
@@ -XXX,XX +XXX,XX @@
57
/* Kick all RR vCPUs. */
58
void qemu_cpu_kick_rr_cpus(CPUState *unused);
59
60
-void *tcg_rr_cpu_thread_fn(void *arg);
61
+/* start the round robin vcpu thread */
62
+void rr_start_vcpu_thread(CPUState *cpu);
63
64
#endif /* TCG_CPUS_RR_H */
65
diff --git a/accel/tcg/tcg-cpus.h b/accel/tcg/tcg-cpus.h
66
index XXXXXXX..XXXXXXX 100644
67
--- a/accel/tcg/tcg-cpus.h
68
+++ b/accel/tcg/tcg-cpus.h
69
@@ -XXX,XX +XXX,XX @@ extern const CpusAccel tcg_cpus_mttcg;
70
extern const CpusAccel tcg_cpus_icount;
71
extern const CpusAccel tcg_cpus_rr;
72
73
-void tcg_start_vcpu_thread(CPUState *cpu);
74
void qemu_tcg_destroy_vcpu(CPUState *cpu);
75
int tcg_cpu_exec(CPUState *cpu);
76
void tcg_handle_interrupt(CPUState *cpu, int mask);
77
diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c
78
index XXXXXXX..XXXXXXX 100644
79
--- a/accel/tcg/tcg-all.c
80
+++ b/accel/tcg/tcg-all.c
81
@@ -XXX,XX +XXX,XX @@ static int tcg_init(MachineState *ms)
82
tcg_exec_init(s->tb_size * 1024 * 1024);
83
mttcg_enabled = s->mttcg_enabled;
84
85
+ /*
86
+ * Initialize TCG regions
87
+ */
88
+ tcg_region_init();
89
+
90
if (mttcg_enabled) {
91
cpus_register_accel(&tcg_cpus_mttcg);
92
} else if (icount_enabled()) {
93
diff --git a/accel/tcg/tcg-cpus-icount.c b/accel/tcg/tcg-cpus-icount.c
94
index XXXXXXX..XXXXXXX 100644
95
--- a/accel/tcg/tcg-cpus-icount.c
96
+++ b/accel/tcg/tcg-cpus-icount.c
97
@@ -XXX,XX +XXX,XX @@ static void icount_handle_interrupt(CPUState *cpu, int mask)
98
}
36
}
99
37
100
const CpusAccel tcg_cpus_icount = {
38
+/* Emits the `addu16i.d d, j, sk16` instruction. */
101
- .create_vcpu_thread = tcg_start_vcpu_thread,
39
+static void __attribute__((unused))
102
+ .create_vcpu_thread = rr_start_vcpu_thread,
40
+tcg_out_opc_addu16i_d(TCGContext *s, TCGReg d, TCGReg j, int32_t sk16)
103
.kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
104
105
.handle_interrupt = icount_handle_interrupt,
106
diff --git a/accel/tcg/tcg-cpus-mttcg.c b/accel/tcg/tcg-cpus-mttcg.c
107
index XXXXXXX..XXXXXXX 100644
108
--- a/accel/tcg/tcg-cpus-mttcg.c
109
+++ b/accel/tcg/tcg-cpus-mttcg.c
110
@@ -XXX,XX +XXX,XX @@
111
#include "hw/boards.h"
112
113
#include "tcg-cpus.h"
114
-#include "tcg-cpus-mttcg.h"
115
116
/*
117
* In the multi-threaded case each vCPU has its own thread. The TLS
118
@@ -XXX,XX +XXX,XX @@
119
* current CPUState for a given thread.
120
*/
121
122
-void *tcg_cpu_thread_fn(void *arg)
123
+static void *tcg_cpu_thread_fn(void *arg)
124
{
125
CPUState *cpu = arg;
126
127
@@ -XXX,XX +XXX,XX @@ static void mttcg_kick_vcpu_thread(CPUState *cpu)
128
cpu_exit(cpu);
129
}
130
131
+static void mttcg_start_vcpu_thread(CPUState *cpu)
132
+{
41
+{
133
+ char thread_name[VCPU_THREAD_NAME_SIZE];
42
+ tcg_out32(s, encode_djsk16_insn(OPC_ADDU16I_D, d, j, sk16));
134
+
135
+ g_assert(tcg_enabled());
136
+
137
+ parallel_cpus = (current_machine->smp.max_cpus > 1);
138
+
139
+ cpu->thread = g_malloc0(sizeof(QemuThread));
140
+ cpu->halt_cond = g_malloc0(sizeof(QemuCond));
141
+ qemu_cond_init(cpu->halt_cond);
142
+
143
+ /* create a thread per vCPU with TCG (MTTCG) */
144
+ snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
145
+ cpu->cpu_index);
146
+
147
+ qemu_thread_create(cpu->thread, thread_name, tcg_cpu_thread_fn,
148
+ cpu, QEMU_THREAD_JOINABLE);
149
+
150
+#ifdef _WIN32
151
+ cpu->hThread = qemu_thread_get_handle(cpu->thread);
152
+#endif
153
+}
43
+}
154
+
44
+
155
const CpusAccel tcg_cpus_mttcg = {
45
/* Emits the `lu12i.w d, sj20` instruction. */
156
- .create_vcpu_thread = tcg_start_vcpu_thread,
46
static void __attribute__((unused))
157
+ .create_vcpu_thread = mttcg_start_vcpu_thread,
47
tcg_out_opc_lu12i_w(TCGContext *s, TCGReg d, int32_t sj20)
158
.kick_vcpu_thread = mttcg_kick_vcpu_thread,
159
160
.handle_interrupt = tcg_handle_interrupt,
161
diff --git a/accel/tcg/tcg-cpus-rr.c b/accel/tcg/tcg-cpus-rr.c
162
index XXXXXXX..XXXXXXX 100644
163
--- a/accel/tcg/tcg-cpus-rr.c
164
+++ b/accel/tcg/tcg-cpus-rr.c
165
@@ -XXX,XX +XXX,XX @@ static void deal_with_unplugged_cpus(void)
166
* elsewhere.
167
*/
168
169
-void *tcg_rr_cpu_thread_fn(void *arg)
170
+static void *tcg_rr_cpu_thread_fn(void *arg)
171
{
172
CPUState *cpu = arg;
173
174
@@ -XXX,XX +XXX,XX @@ void *tcg_rr_cpu_thread_fn(void *arg)
175
return NULL;
176
}
177
178
+void rr_start_vcpu_thread(CPUState *cpu)
179
+{
180
+ char thread_name[VCPU_THREAD_NAME_SIZE];
181
+ static QemuCond *single_tcg_halt_cond;
182
+ static QemuThread *single_tcg_cpu_thread;
183
+
184
+ g_assert(tcg_enabled());
185
+ parallel_cpus = false;
186
+
187
+ if (!single_tcg_cpu_thread) {
188
+ cpu->thread = g_malloc0(sizeof(QemuThread));
189
+ cpu->halt_cond = g_malloc0(sizeof(QemuCond));
190
+ qemu_cond_init(cpu->halt_cond);
191
+
192
+ /* share a single thread for all cpus with TCG */
193
+ snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
194
+ qemu_thread_create(cpu->thread, thread_name,
195
+ tcg_rr_cpu_thread_fn,
196
+ cpu, QEMU_THREAD_JOINABLE);
197
+
198
+ single_tcg_halt_cond = cpu->halt_cond;
199
+ single_tcg_cpu_thread = cpu->thread;
200
+#ifdef _WIN32
201
+ cpu->hThread = qemu_thread_get_handle(cpu->thread);
202
+#endif
203
+ } else {
204
+ /* we share the thread */
205
+ cpu->thread = single_tcg_cpu_thread;
206
+ cpu->halt_cond = single_tcg_halt_cond;
207
+ cpu->thread_id = first_cpu->thread_id;
208
+ cpu->can_do_io = 1;
209
+ cpu->created = true;
210
+ }
211
+}
212
+
213
const CpusAccel tcg_cpus_rr = {
214
- .create_vcpu_thread = tcg_start_vcpu_thread,
215
+ .create_vcpu_thread = rr_start_vcpu_thread,
216
.kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
217
218
.handle_interrupt = tcg_handle_interrupt,
219
diff --git a/accel/tcg/tcg-cpus.c b/accel/tcg/tcg-cpus.c
220
index XXXXXXX..XXXXXXX 100644
221
--- a/accel/tcg/tcg-cpus.c
222
+++ b/accel/tcg/tcg-cpus.c
223
@@ -XXX,XX +XXX,XX @@
224
#include "hw/boards.h"
225
226
#include "tcg-cpus.h"
227
-#include "tcg-cpus-mttcg.h"
228
-#include "tcg-cpus-rr.h"
229
230
/* common functionality among all TCG variants */
231
232
-void tcg_start_vcpu_thread(CPUState *cpu)
233
-{
234
- char thread_name[VCPU_THREAD_NAME_SIZE];
235
- static QemuCond *single_tcg_halt_cond;
236
- static QemuThread *single_tcg_cpu_thread;
237
- static int tcg_region_inited;
238
-
239
- assert(tcg_enabled());
240
- /*
241
- * Initialize TCG regions--once. Now is a good time, because:
242
- * (1) TCG's init context, prologue and target globals have been set up.
243
- * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
244
- * -accel flag is processed, so the check doesn't work then).
245
- */
246
- if (!tcg_region_inited) {
247
- tcg_region_inited = 1;
248
- tcg_region_init();
249
- parallel_cpus = qemu_tcg_mttcg_enabled() && current_machine->smp.max_cpus > 1;
250
- }
251
-
252
- if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
253
- cpu->thread = g_malloc0(sizeof(QemuThread));
254
- cpu->halt_cond = g_malloc0(sizeof(QemuCond));
255
- qemu_cond_init(cpu->halt_cond);
256
-
257
- if (qemu_tcg_mttcg_enabled()) {
258
- /* create a thread per vCPU with TCG (MTTCG) */
259
- snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
260
- cpu->cpu_index);
261
-
262
- qemu_thread_create(cpu->thread, thread_name, tcg_cpu_thread_fn,
263
- cpu, QEMU_THREAD_JOINABLE);
264
-
265
- } else {
266
- /* share a single thread for all cpus with TCG */
267
- snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
268
- qemu_thread_create(cpu->thread, thread_name,
269
- tcg_rr_cpu_thread_fn,
270
- cpu, QEMU_THREAD_JOINABLE);
271
-
272
- single_tcg_halt_cond = cpu->halt_cond;
273
- single_tcg_cpu_thread = cpu->thread;
274
- }
275
-#ifdef _WIN32
276
- cpu->hThread = qemu_thread_get_handle(cpu->thread);
277
-#endif
278
- } else {
279
- /* For non-MTTCG cases we share the thread */
280
- cpu->thread = single_tcg_cpu_thread;
281
- cpu->halt_cond = single_tcg_halt_cond;
282
- cpu->thread_id = first_cpu->thread_id;
283
- cpu->can_do_io = 1;
284
- cpu->created = true;
285
- }
286
-}
287
-
288
void qemu_tcg_destroy_vcpu(CPUState *cpu)
289
{
290
cpu_thread_signal_destroyed(cpu);
291
--
48
--
292
2.25.1
49
2.34.1
293
50
294
51
diff view generated by jsdifflib
New patch
1
Adjust the constraints to allow any int32_t for immediate
2
addition. Split immediate adds into addu16i + addi, which
3
covers quite a lot of the immediate space. For the hole in
4
the middle, load the constant into TMP0 instead.
1
5
6
Reviewed-by: WANG Xuerui <git@xen0n.name>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tcg/loongarch64/tcg-target-con-set.h | 4 +-
10
tcg/loongarch64/tcg-target-con-str.h | 2 +-
11
tcg/loongarch64/tcg-target.c.inc | 57 ++++++++++++++++++++++++----
12
3 files changed, 53 insertions(+), 10 deletions(-)
13
14
diff --git a/tcg/loongarch64/tcg-target-con-set.h b/tcg/loongarch64/tcg-target-con-set.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/loongarch64/tcg-target-con-set.h
17
+++ b/tcg/loongarch64/tcg-target-con-set.h
18
@@ -XXX,XX +XXX,XX @@ C_O1_I1(r, L)
19
C_O1_I2(r, r, rC)
20
C_O1_I2(r, r, ri)
21
C_O1_I2(r, r, rI)
22
+C_O1_I2(r, r, rJ)
23
C_O1_I2(r, r, rU)
24
C_O1_I2(r, r, rW)
25
C_O1_I2(r, r, rZ)
26
C_O1_I2(r, 0, rZ)
27
-C_O1_I2(r, rZ, rN)
28
+C_O1_I2(r, rZ, ri)
29
+C_O1_I2(r, rZ, rJ)
30
C_O1_I2(r, rZ, rZ)
31
diff --git a/tcg/loongarch64/tcg-target-con-str.h b/tcg/loongarch64/tcg-target-con-str.h
32
index XXXXXXX..XXXXXXX 100644
33
--- a/tcg/loongarch64/tcg-target-con-str.h
34
+++ b/tcg/loongarch64/tcg-target-con-str.h
35
@@ -XXX,XX +XXX,XX @@ REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
36
* CONST(letter, TCG_CT_CONST_* bit set)
37
*/
38
CONST('I', TCG_CT_CONST_S12)
39
-CONST('N', TCG_CT_CONST_N12)
40
+CONST('J', TCG_CT_CONST_S32)
41
CONST('U', TCG_CT_CONST_U12)
42
CONST('Z', TCG_CT_CONST_ZERO)
43
CONST('C', TCG_CT_CONST_C12)
44
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
45
index XXXXXXX..XXXXXXX 100644
46
--- a/tcg/loongarch64/tcg-target.c.inc
47
+++ b/tcg/loongarch64/tcg-target.c.inc
48
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_call_oarg_regs[] = {
49
50
#define TCG_CT_CONST_ZERO 0x100
51
#define TCG_CT_CONST_S12 0x200
52
-#define TCG_CT_CONST_N12 0x400
53
+#define TCG_CT_CONST_S32 0x400
54
#define TCG_CT_CONST_U12 0x800
55
#define TCG_CT_CONST_C12 0x1000
56
#define TCG_CT_CONST_WSZ 0x2000
57
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
58
if ((ct & TCG_CT_CONST_S12) && val == sextreg(val, 0, 12)) {
59
return true;
60
}
61
- if ((ct & TCG_CT_CONST_N12) && -val == sextreg(-val, 0, 12)) {
62
+ if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
63
return true;
64
}
65
if ((ct & TCG_CT_CONST_U12) && val >= 0 && val <= 0xfff) {
66
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
67
}
68
}
69
70
+static void tcg_out_addi(TCGContext *s, TCGType type, TCGReg rd,
71
+ TCGReg rs, tcg_target_long imm)
72
+{
73
+ tcg_target_long lo12 = sextreg(imm, 0, 12);
74
+ tcg_target_long hi16 = sextreg(imm - lo12, 16, 16);
75
+
76
+ /*
77
+ * Note that there's a hole in between hi16 and lo12:
78
+ *
79
+ * 3 2 1 0
80
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
81
+ * ...+-------------------------------+-------+-----------------------+
82
+ * | hi16 | | lo12 |
83
+ * ...+-------------------------------+-------+-----------------------+
84
+ *
85
+ * For bits within that hole, it's more efficient to use LU12I and ADD.
86
+ */
87
+ if (imm == (hi16 << 16) + lo12) {
88
+ if (hi16) {
89
+ tcg_out_opc_addu16i_d(s, rd, rs, hi16);
90
+ rs = rd;
91
+ }
92
+ if (type == TCG_TYPE_I32) {
93
+ tcg_out_opc_addi_w(s, rd, rs, lo12);
94
+ } else if (lo12) {
95
+ tcg_out_opc_addi_d(s, rd, rs, lo12);
96
+ } else {
97
+ tcg_out_mov(s, type, rd, rs);
98
+ }
99
+ } else {
100
+ tcg_out_movi(s, type, TCG_REG_TMP0, imm);
101
+ if (type == TCG_TYPE_I32) {
102
+ tcg_out_opc_add_w(s, rd, rs, TCG_REG_TMP0);
103
+ } else {
104
+ tcg_out_opc_add_d(s, rd, rs, TCG_REG_TMP0);
105
+ }
106
+ }
107
+}
108
+
109
static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg)
110
{
111
tcg_out_opc_andi(s, ret, arg, 0xff);
112
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
113
114
case INDEX_op_add_i32:
115
if (c2) {
116
- tcg_out_opc_addi_w(s, a0, a1, a2);
117
+ tcg_out_addi(s, TCG_TYPE_I32, a0, a1, a2);
118
} else {
119
tcg_out_opc_add_w(s, a0, a1, a2);
120
}
121
break;
122
case INDEX_op_add_i64:
123
if (c2) {
124
- tcg_out_opc_addi_d(s, a0, a1, a2);
125
+ tcg_out_addi(s, TCG_TYPE_I64, a0, a1, a2);
126
} else {
127
tcg_out_opc_add_d(s, a0, a1, a2);
128
}
129
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
130
131
case INDEX_op_sub_i32:
132
if (c2) {
133
- tcg_out_opc_addi_w(s, a0, a1, -a2);
134
+ tcg_out_addi(s, TCG_TYPE_I32, a0, a1, -a2);
135
} else {
136
tcg_out_opc_sub_w(s, a0, a1, a2);
137
}
138
break;
139
case INDEX_op_sub_i64:
140
if (c2) {
141
- tcg_out_opc_addi_d(s, a0, a1, -a2);
142
+ tcg_out_addi(s, TCG_TYPE_I64, a0, a1, -a2);
143
} else {
144
tcg_out_opc_sub_d(s, a0, a1, a2);
145
}
146
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
147
return C_O1_I2(r, r, ri);
148
149
case INDEX_op_add_i32:
150
+ return C_O1_I2(r, r, ri);
151
case INDEX_op_add_i64:
152
- return C_O1_I2(r, r, rI);
153
+ return C_O1_I2(r, r, rJ);
154
155
case INDEX_op_and_i32:
156
case INDEX_op_and_i64:
157
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
158
return C_O1_I2(r, 0, rZ);
159
160
case INDEX_op_sub_i32:
161
+ return C_O1_I2(r, rZ, ri);
162
case INDEX_op_sub_i64:
163
- return C_O1_I2(r, rZ, rN);
164
+ return C_O1_I2(r, rZ, rJ);
165
166
case INDEX_op_mul_i32:
167
case INDEX_op_mul_i64:
168
--
169
2.34.1
diff view generated by jsdifflib
New patch
1
Split out a helper function, tcg_out_setcond_int, which
2
does not always produce the complete boolean result, but
3
returns a set of flags to do so.
1
4
5
Accept all int32_t as constant input, so that LE/GT can
6
adjust the constant to LT.
7
8
Reviewed-by: WANG Xuerui <git@xen0n.name>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
tcg/loongarch64/tcg-target.c.inc | 165 +++++++++++++++++++++----------
12
1 file changed, 115 insertions(+), 50 deletions(-)
13
14
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
15
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/loongarch64/tcg-target.c.inc
17
+++ b/tcg/loongarch64/tcg-target.c.inc
18
@@ -XXX,XX +XXX,XX @@ static void tcg_out_clzctz(TCGContext *s, LoongArchInsn opc,
19
tcg_out_opc_or(s, a0, TCG_REG_TMP0, a0);
20
}
21
22
-static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
23
- TCGReg arg1, TCGReg arg2, bool c2)
24
-{
25
- TCGReg tmp;
26
+#define SETCOND_INV TCG_TARGET_NB_REGS
27
+#define SETCOND_NEZ (SETCOND_INV << 1)
28
+#define SETCOND_FLAGS (SETCOND_INV | SETCOND_NEZ)
29
30
- if (c2) {
31
- tcg_debug_assert(arg2 == 0);
32
+static int tcg_out_setcond_int(TCGContext *s, TCGCond cond, TCGReg ret,
33
+ TCGReg arg1, tcg_target_long arg2, bool c2)
34
+{
35
+ int flags = 0;
36
+
37
+ switch (cond) {
38
+ case TCG_COND_EQ: /* -> NE */
39
+ case TCG_COND_GE: /* -> LT */
40
+ case TCG_COND_GEU: /* -> LTU */
41
+ case TCG_COND_GT: /* -> LE */
42
+ case TCG_COND_GTU: /* -> LEU */
43
+ cond = tcg_invert_cond(cond);
44
+ flags ^= SETCOND_INV;
45
+ break;
46
+ default:
47
+ break;
48
}
49
50
switch (cond) {
51
- case TCG_COND_EQ:
52
- if (c2) {
53
- tmp = arg1;
54
- } else {
55
- tcg_out_opc_sub_d(s, ret, arg1, arg2);
56
- tmp = ret;
57
- }
58
- tcg_out_opc_sltui(s, ret, tmp, 1);
59
- break;
60
- case TCG_COND_NE:
61
- if (c2) {
62
- tmp = arg1;
63
- } else {
64
- tcg_out_opc_sub_d(s, ret, arg1, arg2);
65
- tmp = ret;
66
- }
67
- tcg_out_opc_sltu(s, ret, TCG_REG_ZERO, tmp);
68
- break;
69
- case TCG_COND_LT:
70
- tcg_out_opc_slt(s, ret, arg1, arg2);
71
- break;
72
- case TCG_COND_GE:
73
- tcg_out_opc_slt(s, ret, arg1, arg2);
74
- tcg_out_opc_xori(s, ret, ret, 1);
75
- break;
76
case TCG_COND_LE:
77
- tcg_out_setcond(s, TCG_COND_GE, ret, arg2, arg1, false);
78
- break;
79
- case TCG_COND_GT:
80
- tcg_out_setcond(s, TCG_COND_LT, ret, arg2, arg1, false);
81
- break;
82
- case TCG_COND_LTU:
83
- tcg_out_opc_sltu(s, ret, arg1, arg2);
84
- break;
85
- case TCG_COND_GEU:
86
- tcg_out_opc_sltu(s, ret, arg1, arg2);
87
- tcg_out_opc_xori(s, ret, ret, 1);
88
- break;
89
case TCG_COND_LEU:
90
- tcg_out_setcond(s, TCG_COND_GEU, ret, arg2, arg1, false);
91
+ /*
92
+ * If we have a constant input, the most efficient way to implement
93
+ * LE is by adding 1 and using LT. Watch out for wrap around for LEU.
94
+ * We don't need to care for this for LE because the constant input
95
+ * is still constrained to int32_t, and INT32_MAX+1 is representable
96
+ * in the 64-bit temporary register.
97
+ */
98
+ if (c2) {
99
+ if (cond == TCG_COND_LEU) {
100
+ /* unsigned <= -1 is true */
101
+ if (arg2 == -1) {
102
+ tcg_out_movi(s, TCG_TYPE_REG, ret, !(flags & SETCOND_INV));
103
+ return ret;
104
+ }
105
+ cond = TCG_COND_LTU;
106
+ } else {
107
+ cond = TCG_COND_LT;
108
+ }
109
+ arg2 += 1;
110
+ } else {
111
+ TCGReg tmp = arg2;
112
+ arg2 = arg1;
113
+ arg1 = tmp;
114
+ cond = tcg_swap_cond(cond); /* LE -> GE */
115
+ cond = tcg_invert_cond(cond); /* GE -> LT */
116
+ flags ^= SETCOND_INV;
117
+ }
118
break;
119
- case TCG_COND_GTU:
120
- tcg_out_setcond(s, TCG_COND_LTU, ret, arg2, arg1, false);
121
+ default:
122
break;
123
+ }
124
+
125
+ switch (cond) {
126
+ case TCG_COND_NE:
127
+ flags |= SETCOND_NEZ;
128
+ if (!c2) {
129
+ tcg_out_opc_xor(s, ret, arg1, arg2);
130
+ } else if (arg2 == 0) {
131
+ ret = arg1;
132
+ } else if (arg2 >= 0 && arg2 <= 0xfff) {
133
+ tcg_out_opc_xori(s, ret, arg1, arg2);
134
+ } else {
135
+ tcg_out_addi(s, TCG_TYPE_REG, ret, arg1, -arg2);
136
+ }
137
+ break;
138
+
139
+ case TCG_COND_LT:
140
+ case TCG_COND_LTU:
141
+ if (c2) {
142
+ if (arg2 >= -0x800 && arg2 <= 0x7ff) {
143
+ if (cond == TCG_COND_LT) {
144
+ tcg_out_opc_slti(s, ret, arg1, arg2);
145
+ } else {
146
+ tcg_out_opc_sltui(s, ret, arg1, arg2);
147
+ }
148
+ break;
149
+ }
150
+ tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP0, arg2);
151
+ arg2 = TCG_REG_TMP0;
152
+ }
153
+ if (cond == TCG_COND_LT) {
154
+ tcg_out_opc_slt(s, ret, arg1, arg2);
155
+ } else {
156
+ tcg_out_opc_sltu(s, ret, arg1, arg2);
157
+ }
158
+ break;
159
+
160
default:
161
g_assert_not_reached();
162
break;
163
}
164
+
165
+ return ret | flags;
166
+}
167
+
168
+static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
169
+ TCGReg arg1, tcg_target_long arg2, bool c2)
170
+{
171
+ int tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2, c2);
172
+
173
+ if (tmpflags != ret) {
174
+ TCGReg tmp = tmpflags & ~SETCOND_FLAGS;
175
+
176
+ switch (tmpflags & SETCOND_FLAGS) {
177
+ case SETCOND_INV:
178
+ /* Intermediate result is boolean: simply invert. */
179
+ tcg_out_opc_xori(s, ret, tmp, 1);
180
+ break;
181
+ case SETCOND_NEZ:
182
+ /* Intermediate result is zero/non-zero: test != 0. */
183
+ tcg_out_opc_sltu(s, ret, TCG_REG_ZERO, tmp);
184
+ break;
185
+ case SETCOND_NEZ | SETCOND_INV:
186
+ /* Intermediate result is zero/non-zero: test == 0. */
187
+ tcg_out_opc_sltui(s, ret, tmp, 1);
188
+ break;
189
+ default:
190
+ g_assert_not_reached();
191
+ }
192
+ }
193
}
194
195
/*
196
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
197
case INDEX_op_ctz_i64:
198
return C_O1_I2(r, r, rW);
199
200
- case INDEX_op_setcond_i32:
201
- case INDEX_op_setcond_i64:
202
- return C_O1_I2(r, r, rZ);
203
-
204
case INDEX_op_deposit_i32:
205
case INDEX_op_deposit_i64:
206
/* Must deposit into the same register as input */
207
return C_O1_I2(r, 0, rZ);
208
209
case INDEX_op_sub_i32:
210
+ case INDEX_op_setcond_i32:
211
return C_O1_I2(r, rZ, ri);
212
case INDEX_op_sub_i64:
213
+ case INDEX_op_setcond_i64:
214
return C_O1_I2(r, rZ, rJ);
215
216
case INDEX_op_mul_i32:
217
--
218
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: WANG Xuerui <git@xen0n.name>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/loongarch64/tcg-target-con-set.h | 1 +
5
tcg/loongarch64/tcg-target.h | 4 ++--
6
tcg/loongarch64/tcg-target.c.inc | 33 ++++++++++++++++++++++++++++
7
3 files changed, 36 insertions(+), 2 deletions(-)
1
8
9
diff --git a/tcg/loongarch64/tcg-target-con-set.h b/tcg/loongarch64/tcg-target-con-set.h
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/loongarch64/tcg-target-con-set.h
12
+++ b/tcg/loongarch64/tcg-target-con-set.h
13
@@ -XXX,XX +XXX,XX @@ C_O1_I2(r, 0, rZ)
14
C_O1_I2(r, rZ, ri)
15
C_O1_I2(r, rZ, rJ)
16
C_O1_I2(r, rZ, rZ)
17
+C_O1_I4(r, rZ, rJ, rZ, rZ)
18
diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
19
index XXXXXXX..XXXXXXX 100644
20
--- a/tcg/loongarch64/tcg-target.h
21
+++ b/tcg/loongarch64/tcg-target.h
22
@@ -XXX,XX +XXX,XX @@ typedef enum {
23
#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL
24
25
/* optional instructions */
26
-#define TCG_TARGET_HAS_movcond_i32 0
27
+#define TCG_TARGET_HAS_movcond_i32 1
28
#define TCG_TARGET_HAS_div_i32 1
29
#define TCG_TARGET_HAS_rem_i32 1
30
#define TCG_TARGET_HAS_div2_i32 0
31
@@ -XXX,XX +XXX,XX @@ typedef enum {
32
#define TCG_TARGET_HAS_qemu_st8_i32 0
33
34
/* 64-bit operations */
35
-#define TCG_TARGET_HAS_movcond_i64 0
36
+#define TCG_TARGET_HAS_movcond_i64 1
37
#define TCG_TARGET_HAS_div_i64 1
38
#define TCG_TARGET_HAS_rem_i64 1
39
#define TCG_TARGET_HAS_div2_i64 0
40
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
41
index XXXXXXX..XXXXXXX 100644
42
--- a/tcg/loongarch64/tcg-target.c.inc
43
+++ b/tcg/loongarch64/tcg-target.c.inc
44
@@ -XXX,XX +XXX,XX @@ static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
45
}
46
}
47
48
+static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret,
49
+ TCGReg c1, tcg_target_long c2, bool const2,
50
+ TCGReg v1, TCGReg v2)
51
+{
52
+ int tmpflags = tcg_out_setcond_int(s, cond, TCG_REG_TMP0, c1, c2, const2);
53
+ TCGReg t;
54
+
55
+ /* Standardize the test below to t != 0. */
56
+ if (tmpflags & SETCOND_INV) {
57
+ t = v1, v1 = v2, v2 = t;
58
+ }
59
+
60
+ t = tmpflags & ~SETCOND_FLAGS;
61
+ if (v1 == TCG_REG_ZERO) {
62
+ tcg_out_opc_masknez(s, ret, v2, t);
63
+ } else if (v2 == TCG_REG_ZERO) {
64
+ tcg_out_opc_maskeqz(s, ret, v1, t);
65
+ } else {
66
+ tcg_out_opc_masknez(s, TCG_REG_TMP2, v2, t); /* t ? 0 : v2 */
67
+ tcg_out_opc_maskeqz(s, TCG_REG_TMP1, v1, t); /* t ? v1 : 0 */
68
+ tcg_out_opc_or(s, ret, TCG_REG_TMP1, TCG_REG_TMP2);
69
+ }
70
+}
71
+
72
/*
73
* Branch helpers
74
*/
75
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
76
tcg_out_setcond(s, args[3], a0, a1, a2, c2);
77
break;
78
79
+ case INDEX_op_movcond_i32:
80
+ case INDEX_op_movcond_i64:
81
+ tcg_out_movcond(s, args[5], a0, a1, a2, c2, args[3], args[4]);
82
+ break;
83
+
84
case INDEX_op_ld8s_i32:
85
case INDEX_op_ld8s_i64:
86
tcg_out_ldst(s, OPC_LD_B, a0, a1, a2);
87
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
88
case INDEX_op_remu_i64:
89
return C_O1_I2(r, rZ, rZ);
90
91
+ case INDEX_op_movcond_i32:
92
+ case INDEX_op_movcond_i64:
93
+ return C_O1_I4(r, rZ, rJ, rZ, rZ);
94
+
95
default:
96
g_assert_not_reached();
97
}
98
--
99
2.34.1
diff view generated by jsdifflib
1
From: Claudio Fontana <cfontana@suse.de>
1
Take the w^x split into account when computing the
2
pc-relative distance to an absolute pointer.
2
3
3
Signed-off-by: Claudio Fontana <cfontana@suse.de>
4
Reviewed-by: WANG Xuerui <git@xen0n.name>
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Message-Id: <20201015143217.29337-4-cfontana@suse.de>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
accel/tcg/tcg-cpus-icount.h | 6 +--
8
tcg/loongarch64/tcg-target.c.inc | 2 +-
9
accel/tcg/tcg-cpus-rr.h | 2 +-
9
1 file changed, 1 insertion(+), 1 deletion(-)
10
accel/tcg/tcg-cpus.h | 6 +--
11
accel/tcg/tcg-cpus-icount.c | 24 ++++++------
12
accel/tcg/tcg-cpus-mttcg.c | 10 ++---
13
accel/tcg/tcg-cpus-rr.c | 74 ++++++++++++++++++-------------------
14
accel/tcg/tcg-cpus.c | 6 +--
15
7 files changed, 64 insertions(+), 64 deletions(-)
16
10
17
diff --git a/accel/tcg/tcg-cpus-icount.h b/accel/tcg/tcg-cpus-icount.h
11
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
18
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
19
--- a/accel/tcg/tcg-cpus-icount.h
13
--- a/tcg/loongarch64/tcg-target.c.inc
20
+++ b/accel/tcg/tcg-cpus-icount.h
14
+++ b/tcg/loongarch64/tcg-target.c.inc
21
@@ -XXX,XX +XXX,XX @@
15
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ldst(TCGContext *s, LoongArchInsn opc, TCGReg data,
22
#ifndef TCG_CPUS_ICOUNT_H
16
intptr_t imm12 = sextreg(offset, 0, 12);
23
#define TCG_CPUS_ICOUNT_H
17
24
18
if (offset != imm12) {
25
-void handle_icount_deadline(void);
19
- intptr_t diff = offset - (uintptr_t)s->code_ptr;
26
-void prepare_icount_for_run(CPUState *cpu);
20
+ intptr_t diff = tcg_pcrel_diff(s, (void *)offset);
27
-void process_icount_data(CPUState *cpu);
21
28
+void icount_handle_deadline(void);
22
if (addr == TCG_REG_ZERO && diff == (int32_t)diff) {
29
+void icount_prepare_for_run(CPUState *cpu);
23
imm12 = sextreg(diff, 0, 12);
30
+void icount_process_data(CPUState *cpu);
31
32
#endif /* TCG_CPUS_ICOUNT_H */
33
diff --git a/accel/tcg/tcg-cpus-rr.h b/accel/tcg/tcg-cpus-rr.h
34
index XXXXXXX..XXXXXXX 100644
35
--- a/accel/tcg/tcg-cpus-rr.h
36
+++ b/accel/tcg/tcg-cpus-rr.h
37
@@ -XXX,XX +XXX,XX @@
38
#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
39
40
/* Kick all RR vCPUs. */
41
-void qemu_cpu_kick_rr_cpus(CPUState *unused);
42
+void rr_kick_vcpu_thread(CPUState *unused);
43
44
/* start the round robin vcpu thread */
45
void rr_start_vcpu_thread(CPUState *cpu);
46
diff --git a/accel/tcg/tcg-cpus.h b/accel/tcg/tcg-cpus.h
47
index XXXXXXX..XXXXXXX 100644
48
--- a/accel/tcg/tcg-cpus.h
49
+++ b/accel/tcg/tcg-cpus.h
50
@@ -XXX,XX +XXX,XX @@ extern const CpusAccel tcg_cpus_mttcg;
51
extern const CpusAccel tcg_cpus_icount;
52
extern const CpusAccel tcg_cpus_rr;
53
54
-void qemu_tcg_destroy_vcpu(CPUState *cpu);
55
-int tcg_cpu_exec(CPUState *cpu);
56
-void tcg_handle_interrupt(CPUState *cpu, int mask);
57
+void tcg_cpus_destroy(CPUState *cpu);
58
+int tcg_cpus_exec(CPUState *cpu);
59
+void tcg_cpus_handle_interrupt(CPUState *cpu, int mask);
60
61
#endif /* TCG_CPUS_H */
62
diff --git a/accel/tcg/tcg-cpus-icount.c b/accel/tcg/tcg-cpus-icount.c
63
index XXXXXXX..XXXXXXX 100644
64
--- a/accel/tcg/tcg-cpus-icount.c
65
+++ b/accel/tcg/tcg-cpus-icount.c
66
@@ -XXX,XX +XXX,XX @@
67
#include "tcg-cpus-icount.h"
68
#include "tcg-cpus-rr.h"
69
70
-static int64_t tcg_get_icount_limit(void)
71
+static int64_t icount_get_limit(void)
72
{
73
int64_t deadline;
74
75
@@ -XXX,XX +XXX,XX @@ static int64_t tcg_get_icount_limit(void)
76
}
77
}
78
79
-static void notify_aio_contexts(void)
80
+static void icount_notify_aio_contexts(void)
81
{
82
/* Wake up other AioContexts. */
83
qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
84
qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
85
}
86
87
-void handle_icount_deadline(void)
88
+void icount_handle_deadline(void)
89
{
90
assert(qemu_in_vcpu_thread());
91
int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
92
QEMU_TIMER_ATTR_ALL);
93
94
if (deadline == 0) {
95
- notify_aio_contexts();
96
+ icount_notify_aio_contexts();
97
}
98
}
99
100
-void prepare_icount_for_run(CPUState *cpu)
101
+void icount_prepare_for_run(CPUState *cpu)
102
{
103
int insns_left;
104
105
/*
106
- * These should always be cleared by process_icount_data after
107
+ * These should always be cleared by icount_process_data after
108
* each vCPU execution. However u16.high can be raised
109
- * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
110
+ * asynchronously by cpu_exit/cpu_interrupt/tcg_cpus_handle_interrupt
111
*/
112
g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
113
g_assert(cpu->icount_extra == 0);
114
115
- cpu->icount_budget = tcg_get_icount_limit();
116
+ cpu->icount_budget = icount_get_limit();
117
insns_left = MIN(0xffff, cpu->icount_budget);
118
cpu_neg(cpu)->icount_decr.u16.low = insns_left;
119
cpu->icount_extra = cpu->icount_budget - insns_left;
120
@@ -XXX,XX +XXX,XX @@ void prepare_icount_for_run(CPUState *cpu)
121
replay_mutex_lock();
122
123
if (cpu->icount_budget == 0 && replay_has_checkpoint()) {
124
- notify_aio_contexts();
125
+ icount_notify_aio_contexts();
126
}
127
}
128
129
-void process_icount_data(CPUState *cpu)
130
+void icount_process_data(CPUState *cpu)
131
{
132
/* Account for executed instructions */
133
icount_update(cpu);
134
@@ -XXX,XX +XXX,XX @@ static void icount_handle_interrupt(CPUState *cpu, int mask)
135
{
136
int old_mask = cpu->interrupt_request;
137
138
- tcg_handle_interrupt(cpu, mask);
139
+ tcg_cpus_handle_interrupt(cpu, mask);
140
if (qemu_cpu_is_self(cpu) &&
141
!cpu->can_do_io
142
&& (mask & ~old_mask) != 0) {
143
@@ -XXX,XX +XXX,XX @@ static void icount_handle_interrupt(CPUState *cpu, int mask)
144
145
const CpusAccel tcg_cpus_icount = {
146
.create_vcpu_thread = rr_start_vcpu_thread,
147
- .kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
148
+ .kick_vcpu_thread = rr_kick_vcpu_thread,
149
150
.handle_interrupt = icount_handle_interrupt,
151
.get_virtual_clock = icount_get,
152
diff --git a/accel/tcg/tcg-cpus-mttcg.c b/accel/tcg/tcg-cpus-mttcg.c
153
index XXXXXXX..XXXXXXX 100644
154
--- a/accel/tcg/tcg-cpus-mttcg.c
155
+++ b/accel/tcg/tcg-cpus-mttcg.c
156
@@ -XXX,XX +XXX,XX @@
157
* current CPUState for a given thread.
158
*/
159
160
-static void *tcg_cpu_thread_fn(void *arg)
161
+static void *mttcg_cpu_thread_fn(void *arg)
162
{
163
CPUState *cpu = arg;
164
165
@@ -XXX,XX +XXX,XX @@ static void *tcg_cpu_thread_fn(void *arg)
166
if (cpu_can_run(cpu)) {
167
int r;
168
qemu_mutex_unlock_iothread();
169
- r = tcg_cpu_exec(cpu);
170
+ r = tcg_cpus_exec(cpu);
171
qemu_mutex_lock_iothread();
172
switch (r) {
173
case EXCP_DEBUG:
174
@@ -XXX,XX +XXX,XX @@ static void *tcg_cpu_thread_fn(void *arg)
175
qemu_wait_io_event(cpu);
176
} while (!cpu->unplug || cpu_can_run(cpu));
177
178
- qemu_tcg_destroy_vcpu(cpu);
179
+ tcg_cpus_destroy(cpu);
180
qemu_mutex_unlock_iothread();
181
rcu_unregister_thread();
182
return NULL;
183
@@ -XXX,XX +XXX,XX @@ static void mttcg_start_vcpu_thread(CPUState *cpu)
184
snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
185
cpu->cpu_index);
186
187
- qemu_thread_create(cpu->thread, thread_name, tcg_cpu_thread_fn,
188
+ qemu_thread_create(cpu->thread, thread_name, mttcg_cpu_thread_fn,
189
cpu, QEMU_THREAD_JOINABLE);
190
191
#ifdef _WIN32
192
@@ -XXX,XX +XXX,XX @@ const CpusAccel tcg_cpus_mttcg = {
193
.create_vcpu_thread = mttcg_start_vcpu_thread,
194
.kick_vcpu_thread = mttcg_kick_vcpu_thread,
195
196
- .handle_interrupt = tcg_handle_interrupt,
197
+ .handle_interrupt = tcg_cpus_handle_interrupt,
198
};
199
diff --git a/accel/tcg/tcg-cpus-rr.c b/accel/tcg/tcg-cpus-rr.c
200
index XXXXXXX..XXXXXXX 100644
201
--- a/accel/tcg/tcg-cpus-rr.c
202
+++ b/accel/tcg/tcg-cpus-rr.c
203
@@ -XXX,XX +XXX,XX @@
204
#include "tcg-cpus-icount.h"
205
206
/* Kick all RR vCPUs */
207
-void qemu_cpu_kick_rr_cpus(CPUState *unused)
208
+void rr_kick_vcpu_thread(CPUState *unused)
209
{
210
CPUState *cpu;
211
212
@@ -XXX,XX +XXX,XX @@ void qemu_cpu_kick_rr_cpus(CPUState *unused)
213
* idleness is complete.
214
*/
215
216
-static QEMUTimer *tcg_kick_vcpu_timer;
217
-static CPUState *tcg_current_rr_cpu;
218
+static QEMUTimer *rr_kick_vcpu_timer;
219
+static CPUState *rr_current_cpu;
220
221
#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
222
223
-static inline int64_t qemu_tcg_next_kick(void)
224
+static inline int64_t rr_next_kick_time(void)
225
{
226
return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
227
}
228
229
/* Kick the currently round-robin scheduled vCPU to next */
230
-static void qemu_cpu_kick_rr_next_cpu(void)
231
+static void rr_kick_next_cpu(void)
232
{
233
CPUState *cpu;
234
do {
235
- cpu = qatomic_mb_read(&tcg_current_rr_cpu);
236
+ cpu = qatomic_mb_read(&rr_current_cpu);
237
if (cpu) {
238
cpu_exit(cpu);
239
}
240
- } while (cpu != qatomic_mb_read(&tcg_current_rr_cpu));
241
+ } while (cpu != qatomic_mb_read(&rr_current_cpu));
242
}
243
244
-static void kick_tcg_thread(void *opaque)
245
+static void rr_kick_thread(void *opaque)
246
{
247
- timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
248
- qemu_cpu_kick_rr_next_cpu();
249
+ timer_mod(rr_kick_vcpu_timer, rr_next_kick_time());
250
+ rr_kick_next_cpu();
251
}
252
253
-static void start_tcg_kick_timer(void)
254
+static void rr_start_kick_timer(void)
255
{
256
- if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
257
- tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
258
- kick_tcg_thread, NULL);
259
+ if (!rr_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
260
+ rr_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
261
+ rr_kick_thread, NULL);
262
}
263
- if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
264
- timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
265
+ if (rr_kick_vcpu_timer && !timer_pending(rr_kick_vcpu_timer)) {
266
+ timer_mod(rr_kick_vcpu_timer, rr_next_kick_time());
267
}
268
}
269
270
-static void stop_tcg_kick_timer(void)
271
+static void rr_stop_kick_timer(void)
272
{
273
- if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
274
- timer_del(tcg_kick_vcpu_timer);
275
+ if (rr_kick_vcpu_timer && timer_pending(rr_kick_vcpu_timer)) {
276
+ timer_del(rr_kick_vcpu_timer);
277
}
278
}
279
280
-static void qemu_tcg_rr_wait_io_event(void)
281
+static void rr_wait_io_event(void)
282
{
283
CPUState *cpu;
284
285
while (all_cpu_threads_idle()) {
286
- stop_tcg_kick_timer();
287
+ rr_stop_kick_timer();
288
qemu_cond_wait_iothread(first_cpu->halt_cond);
289
}
290
291
- start_tcg_kick_timer();
292
+ rr_start_kick_timer();
293
294
CPU_FOREACH(cpu) {
295
qemu_wait_io_event_common(cpu);
296
@@ -XXX,XX +XXX,XX @@ static void qemu_tcg_rr_wait_io_event(void)
297
* Destroy any remaining vCPUs which have been unplugged and have
298
* finished running
299
*/
300
-static void deal_with_unplugged_cpus(void)
301
+static void rr_deal_with_unplugged_cpus(void)
302
{
303
CPUState *cpu;
304
305
CPU_FOREACH(cpu) {
306
if (cpu->unplug && !cpu_can_run(cpu)) {
307
- qemu_tcg_destroy_vcpu(cpu);
308
+ tcg_cpus_destroy(cpu);
309
break;
310
}
311
}
312
@@ -XXX,XX +XXX,XX @@ static void deal_with_unplugged_cpus(void)
313
* elsewhere.
314
*/
315
316
-static void *tcg_rr_cpu_thread_fn(void *arg)
317
+static void *rr_cpu_thread_fn(void *arg)
318
{
319
CPUState *cpu = arg;
320
321
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
322
}
323
}
324
325
- start_tcg_kick_timer();
326
+ rr_start_kick_timer();
327
328
cpu = first_cpu;
329
330
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
331
* Run the timers here. This is much more efficient than
332
* waking up the I/O thread and waiting for completion.
333
*/
334
- handle_icount_deadline();
335
+ icount_handle_deadline();
336
}
337
338
replay_mutex_unlock();
339
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
340
341
while (cpu && cpu_work_list_empty(cpu) && !cpu->exit_request) {
342
343
- qatomic_mb_set(&tcg_current_rr_cpu, cpu);
344
+ qatomic_mb_set(&rr_current_cpu, cpu);
345
current_cpu = cpu;
346
347
qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
348
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
349
350
qemu_mutex_unlock_iothread();
351
if (icount_enabled()) {
352
- prepare_icount_for_run(cpu);
353
+ icount_prepare_for_run(cpu);
354
}
355
- r = tcg_cpu_exec(cpu);
356
+ r = tcg_cpus_exec(cpu);
357
if (icount_enabled()) {
358
- process_icount_data(cpu);
359
+ icount_process_data(cpu);
360
}
361
qemu_mutex_lock_iothread();
362
363
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
364
} /* while (cpu && !cpu->exit_request).. */
365
366
/* Does not need qatomic_mb_set because a spurious wakeup is okay. */
367
- qatomic_set(&tcg_current_rr_cpu, NULL);
368
+ qatomic_set(&rr_current_cpu, NULL);
369
370
if (cpu && cpu->exit_request) {
371
qatomic_mb_set(&cpu->exit_request, 0);
372
@@ -XXX,XX +XXX,XX @@ static void *tcg_rr_cpu_thread_fn(void *arg)
373
qemu_notify_event();
374
}
375
376
- qemu_tcg_rr_wait_io_event();
377
- deal_with_unplugged_cpus();
378
+ rr_wait_io_event();
379
+ rr_deal_with_unplugged_cpus();
380
}
381
382
rcu_unregister_thread();
383
@@ -XXX,XX +XXX,XX @@ void rr_start_vcpu_thread(CPUState *cpu)
384
/* share a single thread for all cpus with TCG */
385
snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
386
qemu_thread_create(cpu->thread, thread_name,
387
- tcg_rr_cpu_thread_fn,
388
+ rr_cpu_thread_fn,
389
cpu, QEMU_THREAD_JOINABLE);
390
391
single_tcg_halt_cond = cpu->halt_cond;
392
@@ -XXX,XX +XXX,XX @@ void rr_start_vcpu_thread(CPUState *cpu)
393
394
const CpusAccel tcg_cpus_rr = {
395
.create_vcpu_thread = rr_start_vcpu_thread,
396
- .kick_vcpu_thread = qemu_cpu_kick_rr_cpus,
397
+ .kick_vcpu_thread = rr_kick_vcpu_thread,
398
399
- .handle_interrupt = tcg_handle_interrupt,
400
+ .handle_interrupt = tcg_cpus_handle_interrupt,
401
};
402
diff --git a/accel/tcg/tcg-cpus.c b/accel/tcg/tcg-cpus.c
403
index XXXXXXX..XXXXXXX 100644
404
--- a/accel/tcg/tcg-cpus.c
405
+++ b/accel/tcg/tcg-cpus.c
406
@@ -XXX,XX +XXX,XX @@
407
408
/* common functionality among all TCG variants */
409
410
-void qemu_tcg_destroy_vcpu(CPUState *cpu)
411
+void tcg_cpus_destroy(CPUState *cpu)
412
{
413
cpu_thread_signal_destroyed(cpu);
414
}
415
416
-int tcg_cpu_exec(CPUState *cpu)
417
+int tcg_cpus_exec(CPUState *cpu)
418
{
419
int ret;
420
#ifdef CONFIG_PROFILER
421
@@ -XXX,XX +XXX,XX @@ int tcg_cpu_exec(CPUState *cpu)
422
}
423
424
/* mask must never be zero, except for A20 change call */
425
-void tcg_handle_interrupt(CPUState *cpu, int mask)
426
+void tcg_cpus_handle_interrupt(CPUState *cpu, int mask)
427
{
428
g_assert(qemu_mutex_iothread_locked());
429
430
--
24
--
431
2.25.1
25
2.34.1
432
26
433
27
diff view generated by jsdifflib
New patch
1
The old implementation replaces two insns, swapping between
1
2
3
b <dest>
4
nop
5
and
6
pcaddu18i tmp, <dest>
7
jirl zero, tmp, <dest> & 0xffff
8
9
There is a race condition in which a thread could be stopped at
10
the jirl, i.e. with the top of the address loaded, and when
11
restarted we have re-linked to a different TB, so that the top
12
half no longer matches the bottom half.
13
14
Note that while we never directly re-link to a different TB, we
15
can link, unlink, and link again all while the stopped thread
16
remains stopped.
17
18
The new implementation replaces only one insn, swapping between
19
20
b <dest>
21
and
22
pcadd tmp, <jmp_addr>
23
24
falling through to load the address from tmp, and branch.
25
26
Reviewed-by: WANG Xuerui <git@xen0n.name>
27
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
28
---
29
tcg/loongarch64/tcg-target.h | 7 +---
30
tcg/loongarch64/tcg-target.c.inc | 72 ++++++++++++++------------------
31
2 files changed, 33 insertions(+), 46 deletions(-)
32
33
diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
34
index XXXXXXX..XXXXXXX 100644
35
--- a/tcg/loongarch64/tcg-target.h
36
+++ b/tcg/loongarch64/tcg-target.h
37
@@ -XXX,XX +XXX,XX @@
38
39
#define TCG_TARGET_INSN_UNIT_SIZE 4
40
#define TCG_TARGET_NB_REGS 32
41
-/*
42
- * PCADDU18I + JIRL sequence can give 20 + 16 + 2 = 38 bits
43
- * signed offset, which is +/- 128 GiB.
44
- */
45
-#define MAX_CODE_GEN_BUFFER_SIZE (128 * GiB)
46
+
47
+#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
48
49
typedef enum {
50
TCG_REG_ZERO,
51
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
52
index XXXXXXX..XXXXXXX 100644
53
--- a/tcg/loongarch64/tcg-target.c.inc
54
+++ b/tcg/loongarch64/tcg-target.c.inc
55
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args)
56
#endif
57
}
58
59
-/* LoongArch uses `andi zero, zero, 0` as NOP. */
60
-#define NOP OPC_ANDI
61
-static void tcg_out_nop(TCGContext *s)
62
-{
63
- tcg_out32(s, NOP);
64
-}
65
-
66
-void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
67
- uintptr_t jmp_rx, uintptr_t jmp_rw)
68
-{
69
- tcg_insn_unit i1, i2;
70
- ptrdiff_t upper, lower;
71
- uintptr_t addr = tb->jmp_target_addr[n];
72
- ptrdiff_t offset = (ptrdiff_t)(addr - jmp_rx) >> 2;
73
-
74
- if (offset == sextreg(offset, 0, 26)) {
75
- i1 = encode_sd10k16_insn(OPC_B, offset);
76
- i2 = NOP;
77
- } else {
78
- tcg_debug_assert(offset == sextreg(offset, 0, 36));
79
- lower = (int16_t)offset;
80
- upper = (offset - lower) >> 16;
81
-
82
- i1 = encode_dsj20_insn(OPC_PCADDU18I, TCG_REG_TMP0, upper);
83
- i2 = encode_djsk16_insn(OPC_JIRL, TCG_REG_ZERO, TCG_REG_TMP0, lower);
84
- }
85
- uint64_t pair = ((uint64_t)i2 << 32) | i1;
86
- qatomic_set((uint64_t *)jmp_rw, pair);
87
- flush_idcache_range(jmp_rx, jmp_rw, 8);
88
-}
89
-
90
/*
91
* Entry-points
92
*/
93
@@ -XXX,XX +XXX,XX @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
94
static void tcg_out_goto_tb(TCGContext *s, int which)
95
{
96
/*
97
- * Ensure that patch area is 8-byte aligned so that an
98
- * atomic write can be used to patch the target address.
99
+ * Direct branch, or load indirect address, to be patched
100
+ * by tb_target_set_jmp_target. Check indirect load offset
101
+ * in range early, regardless of direct branch distance,
102
+ * via assert within tcg_out_opc_pcaddu2i.
103
*/
104
- if ((uintptr_t)s->code_ptr & 7) {
105
- tcg_out_nop(s);
106
- }
107
+ uintptr_t i_addr = get_jmp_target_addr(s, which);
108
+ intptr_t i_disp = tcg_pcrel_diff(s, (void *)i_addr);
109
+
110
set_jmp_insn_offset(s, which);
111
- /*
112
- * actual branch destination will be patched by
113
- * tb_target_set_jmp_target later
114
- */
115
- tcg_out_opc_pcaddu18i(s, TCG_REG_TMP0, 0);
116
+ tcg_out_opc_pcaddu2i(s, TCG_REG_TMP0, i_disp >> 2);
117
+
118
+ /* Finish the load and indirect branch. */
119
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_TMP0, 0);
120
tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
121
set_jmp_reset_offset(s, which);
122
}
123
124
+void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
125
+ uintptr_t jmp_rx, uintptr_t jmp_rw)
126
+{
127
+ uintptr_t d_addr = tb->jmp_target_addr[n];
128
+ ptrdiff_t d_disp = (ptrdiff_t)(d_addr - jmp_rx) >> 2;
129
+ tcg_insn_unit insn;
130
+
131
+ /* Either directly branch, or load slot address for indirect branch. */
132
+ if (d_disp == sextreg(d_disp, 0, 26)) {
133
+ insn = encode_sd10k16_insn(OPC_B, d_disp);
134
+ } else {
135
+ uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n];
136
+ intptr_t i_disp = i_addr - jmp_rx;
137
+ insn = encode_dsj20_insn(OPC_PCADDU2I, TCG_REG_TMP0, i_disp >> 2);
138
+ }
139
+
140
+ qatomic_set((tcg_insn_unit *)jmp_rw, insn);
141
+ flush_idcache_range(jmp_rx, jmp_rw, 4);
142
+}
143
+
144
static void tcg_out_op(TCGContext *s, TCGOpcode opc,
145
const TCGArg args[TCG_MAX_OP_ARGS],
146
const int const_args[TCG_MAX_OP_ARGS])
147
--
148
2.34.1
diff view generated by jsdifflib