1
The following changes since commit 1cbd2d914939ee6028e9688d4ba859a528c28405:
1
TCG patch queue, plus one target/sh4 patch that
2
Yoshinori Sato asked me to process.
2
3
3
Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request' into staging (2021-06-04 13:38:49 +0100)
4
5
r~
6
7
8
The following changes since commit efbf38d73e5dcc4d5f8b98c6e7a12be1f3b91745:
9
10
Merge tag 'for-upstream' of git://repo.or.cz/qemu/kevin into staging (2022-10-03 15:06:07 -0400)
4
11
5
are available in the Git repository at:
12
are available in the Git repository at:
6
13
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20210604
14
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20221004
8
15
9
for you to fetch changes up to 0006039e29b9e6118beab300146f7c4931f7a217:
16
for you to fetch changes up to ab419fd8a035a65942de4e63effcd55ccbf1a9fe:
10
17
11
tcg/arm: Implement TCG_TARGET_HAS_rotv_vec (2021-06-04 11:50:11 -0700)
18
target/sh4: Fix TB_FLAG_UNALIGN (2022-10-04 12:33:05 -0700)
12
19
13
----------------------------------------------------------------
20
----------------------------------------------------------------
14
Host vector support for arm neon.
21
Cache CPUClass for use in hot code paths.
22
Add CPUTLBEntryFull, probe_access_full, tlb_set_page_full.
23
Add generic support for TARGET_TB_PCREL.
24
tcg/ppc: Optimize 26-bit jumps using STQ for POWER 2.07
25
target/sh4: Fix TB_FLAG_UNALIGN
15
26
16
----------------------------------------------------------------
27
----------------------------------------------------------------
17
Richard Henderson (15):
28
Alex Bennée (3):
18
tcg: Change parameters for tcg_target_const_match
29
cpu: cache CPUClass in CPUState for hot code paths
19
tcg/arm: Add host vector framework
30
hw/core/cpu-sysemu: used cached class in cpu_asidx_from_attrs
20
tcg/arm: Implement tcg_out_ld/st for vector types
31
cputlb: used cached CPUClass in our hot-paths
21
tcg/arm: Implement tcg_out_mov for vector types
22
tcg/arm: Implement tcg_out_dup*_vec
23
tcg/arm: Implement minimal vector operations
24
tcg/arm: Implement andc, orc, abs, neg, not vector operations
25
tcg/arm: Implement TCG_TARGET_HAS_shi_vec
26
tcg/arm: Implement TCG_TARGET_HAS_mul_vec
27
tcg/arm: Implement TCG_TARGET_HAS_sat_vec
28
tcg/arm: Implement TCG_TARGET_HAS_minmax_vec
29
tcg/arm: Implement TCG_TARGET_HAS_bitsel_vec
30
tcg/arm: Implement TCG_TARGET_HAS_shv_vec
31
tcg/arm: Implement TCG_TARGET_HAS_roti_vec
32
tcg/arm: Implement TCG_TARGET_HAS_rotv_vec
33
32
34
tcg/arm/tcg-target-con-set.h | 10 +
33
Leandro Lupori (1):
35
tcg/arm/tcg-target-con-str.h | 3 +
34
tcg/ppc: Optimize 26-bit jumps
36
tcg/arm/tcg-target.h | 52 ++-
37
tcg/arm/tcg-target.opc.h | 16 +
38
tcg/tcg.c | 5 +-
39
tcg/aarch64/tcg-target.c.inc | 5 +-
40
tcg/arm/tcg-target.c.inc | 956 +++++++++++++++++++++++++++++++++++++++++--
41
tcg/i386/tcg-target.c.inc | 4 +-
42
tcg/mips/tcg-target.c.inc | 5 +-
43
tcg/ppc/tcg-target.c.inc | 4 +-
44
tcg/riscv/tcg-target.c.inc | 4 +-
45
tcg/s390/tcg-target.c.inc | 5 +-
46
tcg/sparc/tcg-target.c.inc | 5 +-
47
tcg/tci/tcg-target.c.inc | 6 +-
48
14 files changed, 1001 insertions(+), 79 deletions(-)
49
create mode 100644 tcg/arm/tcg-target.opc.h
50
35
36
Richard Henderson (16):
37
accel/tcg: Rename CPUIOTLBEntry to CPUTLBEntryFull
38
accel/tcg: Drop addr member from SavedIOTLB
39
accel/tcg: Suppress auto-invalidate in probe_access_internal
40
accel/tcg: Introduce probe_access_full
41
accel/tcg: Introduce tlb_set_page_full
42
include/exec: Introduce TARGET_PAGE_ENTRY_EXTRA
43
accel/tcg: Remove PageDesc code_bitmap
44
accel/tcg: Use bool for page_find_alloc
45
accel/tcg: Use DisasContextBase in plugin_gen_tb_start
46
accel/tcg: Do not align tb->page_addr[0]
47
accel/tcg: Inline tb_flush_jmp_cache
48
include/hw/core: Create struct CPUJumpCache
49
hw/core: Add CPUClass.get_pc
50
accel/tcg: Introduce tb_pc and log_pc
51
accel/tcg: Introduce TARGET_TB_PCREL
52
target/sh4: Fix TB_FLAG_UNALIGN
53
54
accel/tcg/internal.h | 10 ++
55
accel/tcg/tb-hash.h | 1 +
56
accel/tcg/tb-jmp-cache.h | 65 ++++++++
57
include/exec/cpu-common.h | 1 +
58
include/exec/cpu-defs.h | 48 ++++--
59
include/exec/exec-all.h | 75 ++++++++-
60
include/exec/plugin-gen.h | 7 +-
61
include/hw/core/cpu.h | 28 ++--
62
include/qemu/typedefs.h | 2 +
63
include/tcg/tcg.h | 2 +-
64
target/sh4/cpu.h | 56 ++++---
65
accel/stubs/tcg-stub.c | 4 +
66
accel/tcg/cpu-exec.c | 80 +++++-----
67
accel/tcg/cputlb.c | 259 ++++++++++++++++++--------------
68
accel/tcg/plugin-gen.c | 22 +--
69
accel/tcg/translate-all.c | 214 ++++++++++++--------------
70
accel/tcg/translator.c | 2 +-
71
cpu.c | 9 +-
72
hw/core/cpu-common.c | 3 +-
73
hw/core/cpu-sysemu.c | 5 +-
74
linux-user/sh4/signal.c | 6 +-
75
plugins/core.c | 2 +-
76
target/alpha/cpu.c | 9 ++
77
target/arm/cpu.c | 17 ++-
78
target/arm/mte_helper.c | 14 +-
79
target/arm/sve_helper.c | 4 +-
80
target/arm/translate-a64.c | 2 +-
81
target/avr/cpu.c | 10 +-
82
target/cris/cpu.c | 8 +
83
target/hexagon/cpu.c | 10 +-
84
target/hppa/cpu.c | 12 +-
85
target/i386/cpu.c | 9 ++
86
target/i386/tcg/tcg-cpu.c | 2 +-
87
target/loongarch/cpu.c | 11 +-
88
target/m68k/cpu.c | 8 +
89
target/microblaze/cpu.c | 10 +-
90
target/mips/cpu.c | 8 +
91
target/mips/tcg/exception.c | 2 +-
92
target/mips/tcg/sysemu/special_helper.c | 2 +-
93
target/nios2/cpu.c | 9 ++
94
target/openrisc/cpu.c | 10 +-
95
target/ppc/cpu_init.c | 8 +
96
target/riscv/cpu.c | 17 ++-
97
target/rx/cpu.c | 10 +-
98
target/s390x/cpu.c | 8 +
99
target/s390x/tcg/mem_helper.c | 4 -
100
target/sh4/cpu.c | 18 ++-
101
target/sh4/helper.c | 6 +-
102
target/sh4/translate.c | 90 +++++------
103
target/sparc/cpu.c | 10 +-
104
target/tricore/cpu.c | 11 +-
105
target/xtensa/cpu.c | 8 +
106
tcg/tcg.c | 8 +-
107
trace/control-target.c | 2 +-
108
tcg/ppc/tcg-target.c.inc | 119 +++++++++++----
109
55 files changed, 915 insertions(+), 462 deletions(-)
110
create mode 100644 accel/tcg/tb-jmp-cache.h
111
diff view generated by jsdifflib
New patch
1
From: Alex Bennée <alex.bennee@linaro.org>
1
2
3
The class cast checkers are quite expensive and always on (unlike the
4
dynamic case who's checks are gated by CONFIG_QOM_CAST_DEBUG). To
5
avoid the overhead of repeatedly checking something which should never
6
change we cache the CPUClass reference for use in the hot code paths.
7
8
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Message-Id: <20220811151413.3350684-3-alex.bennee@linaro.org>
11
Signed-off-by: Cédric Le Goater <clg@kaod.org>
12
Message-Id: <20220923084803.498337-3-clg@kaod.org>
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
---
15
include/hw/core/cpu.h | 9 +++++++++
16
cpu.c | 9 ++++-----
17
2 files changed, 13 insertions(+), 5 deletions(-)
18
19
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
20
index XXXXXXX..XXXXXXX 100644
21
--- a/include/hw/core/cpu.h
22
+++ b/include/hw/core/cpu.h
23
@@ -XXX,XX +XXX,XX @@ typedef int (*WriteCoreDumpFunction)(const void *buf, size_t size,
24
*/
25
#define CPU(obj) ((CPUState *)(obj))
26
27
+/*
28
+ * The class checkers bring in CPU_GET_CLASS() which is potentially
29
+ * expensive given the eventual call to
30
+ * object_class_dynamic_cast_assert(). Because of this the CPUState
31
+ * has a cached value for the class in cs->cc which is set up in
32
+ * cpu_exec_realizefn() for use in hot code paths.
33
+ */
34
typedef struct CPUClass CPUClass;
35
DECLARE_CLASS_CHECKERS(CPUClass, CPU,
36
TYPE_CPU)
37
@@ -XXX,XX +XXX,XX @@ struct qemu_work_item;
38
struct CPUState {
39
/*< private >*/
40
DeviceState parent_obj;
41
+ /* cache to avoid expensive CPU_GET_CLASS */
42
+ CPUClass *cc;
43
/*< public >*/
44
45
int nr_cores;
46
diff --git a/cpu.c b/cpu.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/cpu.c
49
+++ b/cpu.c
50
@@ -XXX,XX +XXX,XX @@ const VMStateDescription vmstate_cpu_common = {
51
52
void cpu_exec_realizefn(CPUState *cpu, Error **errp)
53
{
54
-#ifndef CONFIG_USER_ONLY
55
- CPUClass *cc = CPU_GET_CLASS(cpu);
56
-#endif
57
+ /* cache the cpu class for the hotpath */
58
+ cpu->cc = CPU_GET_CLASS(cpu);
59
60
cpu_list_add(cpu);
61
if (!accel_cpu_realizefn(cpu, errp)) {
62
@@ -XXX,XX +XXX,XX @@ void cpu_exec_realizefn(CPUState *cpu, Error **errp)
63
if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
64
vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
65
}
66
- if (cc->sysemu_ops->legacy_vmsd != NULL) {
67
- vmstate_register(NULL, cpu->cpu_index, cc->sysemu_ops->legacy_vmsd, cpu);
68
+ if (cpu->cc->sysemu_ops->legacy_vmsd != NULL) {
69
+ vmstate_register(NULL, cpu->cpu_index, cpu->cc->sysemu_ops->legacy_vmsd, cpu);
70
}
71
#endif /* CONFIG_USER_ONLY */
72
}
73
--
74
2.34.1
75
76
diff view generated by jsdifflib
New patch
1
From: Alex Bennée <alex.bennee@linaro.org>
1
2
3
This is a heavily used function so lets avoid the cost of
4
CPU_GET_CLASS. On the romulus-bmc run it has a modest effect:
5
6
Before: 36.812 s ± 0.506 s
7
After: 35.912 s ± 0.168 s
8
9
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-Id: <20220811151413.3350684-4-alex.bennee@linaro.org>
12
Signed-off-by: Cédric Le Goater <clg@kaod.org>
13
Message-Id: <20220923084803.498337-4-clg@kaod.org>
14
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
15
---
16
hw/core/cpu-sysemu.c | 5 ++---
17
1 file changed, 2 insertions(+), 3 deletions(-)
18
19
diff --git a/hw/core/cpu-sysemu.c b/hw/core/cpu-sysemu.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/hw/core/cpu-sysemu.c
22
+++ b/hw/core/cpu-sysemu.c
23
@@ -XXX,XX +XXX,XX @@ hwaddr cpu_get_phys_page_debug(CPUState *cpu, vaddr addr)
24
25
int cpu_asidx_from_attrs(CPUState *cpu, MemTxAttrs attrs)
26
{
27
- CPUClass *cc = CPU_GET_CLASS(cpu);
28
int ret = 0;
29
30
- if (cc->sysemu_ops->asidx_from_attrs) {
31
- ret = cc->sysemu_ops->asidx_from_attrs(cpu, attrs);
32
+ if (cpu->cc->sysemu_ops->asidx_from_attrs) {
33
+ ret = cpu->cc->sysemu_ops->asidx_from_attrs(cpu, attrs);
34
assert(ret < cpu->num_ases && ret >= 0);
35
}
36
return ret;
37
--
38
2.34.1
39
40
diff view generated by jsdifflib
New patch
1
From: Alex Bennée <alex.bennee@linaro.org>
1
2
3
Before: 35.912 s ± 0.168 s
4
After: 35.565 s ± 0.087 s
5
6
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-Id: <20220811151413.3350684-5-alex.bennee@linaro.org>
9
Signed-off-by: Cédric Le Goater <clg@kaod.org>
10
Message-Id: <20220923084803.498337-5-clg@kaod.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
13
accel/tcg/cputlb.c | 15 ++++++---------
14
1 file changed, 6 insertions(+), 9 deletions(-)
15
16
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/accel/tcg/cputlb.c
19
+++ b/accel/tcg/cputlb.c
20
@@ -XXX,XX +XXX,XX @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr,
21
static void tlb_fill(CPUState *cpu, target_ulong addr, int size,
22
MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
23
{
24
- CPUClass *cc = CPU_GET_CLASS(cpu);
25
bool ok;
26
27
/*
28
* This is not a probe, so only valid return is success; failure
29
* should result in exception + longjmp to the cpu loop.
30
*/
31
- ok = cc->tcg_ops->tlb_fill(cpu, addr, size,
32
- access_type, mmu_idx, false, retaddr);
33
+ ok = cpu->cc->tcg_ops->tlb_fill(cpu, addr, size,
34
+ access_type, mmu_idx, false, retaddr);
35
assert(ok);
36
}
37
38
@@ -XXX,XX +XXX,XX @@ static inline void cpu_unaligned_access(CPUState *cpu, vaddr addr,
39
MMUAccessType access_type,
40
int mmu_idx, uintptr_t retaddr)
41
{
42
- CPUClass *cc = CPU_GET_CLASS(cpu);
43
-
44
- cc->tcg_ops->do_unaligned_access(cpu, addr, access_type, mmu_idx, retaddr);
45
+ cpu->cc->tcg_ops->do_unaligned_access(cpu, addr, access_type,
46
+ mmu_idx, retaddr);
47
}
48
49
static inline void cpu_transaction_failed(CPUState *cpu, hwaddr physaddr,
50
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
51
if (!tlb_hit_page(tlb_addr, page_addr)) {
52
if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) {
53
CPUState *cs = env_cpu(env);
54
- CPUClass *cc = CPU_GET_CLASS(cs);
55
56
- if (!cc->tcg_ops->tlb_fill(cs, addr, fault_size, access_type,
57
- mmu_idx, nonfault, retaddr)) {
58
+ if (!cs->cc->tcg_ops->tlb_fill(cs, addr, fault_size, access_type,
59
+ mmu_idx, nonfault, retaddr)) {
60
/* Non-faulting page table read failed. */
61
*phost = NULL;
62
return TLB_INVALID_MASK;
63
--
64
2.34.1
65
66
diff view generated by jsdifflib
1
Implement via expansion, so don't actually set TCG_TARGET_HAS_rotv_vec.
1
This structure will shortly contain more than just
2
data for accessing MMIO. Rename the 'addr' member
3
to 'xlat_section' to more clearly indicate its purpose.
2
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
9
---
6
tcg/arm/tcg-target.c.inc | 35 ++++++++++++++++++++++++++++++++++-
10
include/exec/cpu-defs.h | 22 ++++----
7
1 file changed, 34 insertions(+), 1 deletion(-)
11
accel/tcg/cputlb.c | 102 +++++++++++++++++++------------------
12
target/arm/mte_helper.c | 14 ++---
13
target/arm/sve_helper.c | 4 +-
14
target/arm/translate-a64.c | 2 +-
15
5 files changed, 73 insertions(+), 71 deletions(-)
8
16
9
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
17
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
10
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/arm/tcg-target.c.inc
19
--- a/include/exec/cpu-defs.h
12
+++ b/tcg/arm/tcg-target.c.inc
20
+++ b/include/exec/cpu-defs.h
13
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
21
@@ -XXX,XX +XXX,XX @@ typedef uint64_t target_ulong;
14
case INDEX_op_shrv_vec:
22
# endif
15
case INDEX_op_sarv_vec:
23
# endif
16
case INDEX_op_rotli_vec:
24
17
+ case INDEX_op_rotlv_vec:
25
+/* Minimalized TLB entry for use by TCG fast path. */
18
+ case INDEX_op_rotrv_vec:
26
typedef struct CPUTLBEntry {
19
return -1;
27
/* bit TARGET_LONG_BITS to TARGET_PAGE_BITS : virtual address
20
default:
28
bit TARGET_PAGE_BITS-1..4 : Nonzero for accesses that should not
21
return 0;
29
@@ -XXX,XX +XXX,XX @@ typedef struct CPUTLBEntry {
22
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
30
23
TCGArg a0, ...)
31
QEMU_BUILD_BUG_ON(sizeof(CPUTLBEntry) != (1 << CPU_TLB_ENTRY_BITS));
32
33
-/* The IOTLB is not accessed directly inline by generated TCG code,
34
- * so the CPUIOTLBEntry layout is not as critical as that of the
35
- * CPUTLBEntry. (This is also why we don't want to combine the two
36
- * structs into one.)
37
+/*
38
+ * The full TLB entry, which is not accessed by generated TCG code,
39
+ * so the layout is not as critical as that of CPUTLBEntry. This is
40
+ * also why we don't want to combine the two structs.
41
*/
42
-typedef struct CPUIOTLBEntry {
43
+typedef struct CPUTLBEntryFull {
44
/*
45
- * @addr contains:
46
+ * @xlat_section contains:
47
* - in the lower TARGET_PAGE_BITS, a physical section number
48
* - with the lower TARGET_PAGE_BITS masked off, an offset which
49
* must be added to the virtual address to obtain:
50
@@ -XXX,XX +XXX,XX @@ typedef struct CPUIOTLBEntry {
51
* number is PHYS_SECTION_NOTDIRTY or PHYS_SECTION_ROM)
52
* + the offset within the target MemoryRegion (otherwise)
53
*/
54
- hwaddr addr;
55
+ hwaddr xlat_section;
56
MemTxAttrs attrs;
57
-} CPUIOTLBEntry;
58
+} CPUTLBEntryFull;
59
60
/*
61
* Data elements that are per MMU mode, minus the bits accessed by
62
@@ -XXX,XX +XXX,XX @@ typedef struct CPUTLBDesc {
63
size_t vindex;
64
/* The tlb victim table, in two parts. */
65
CPUTLBEntry vtable[CPU_VTLB_SIZE];
66
- CPUIOTLBEntry viotlb[CPU_VTLB_SIZE];
67
- /* The iotlb. */
68
- CPUIOTLBEntry *iotlb;
69
+ CPUTLBEntryFull vfulltlb[CPU_VTLB_SIZE];
70
+ CPUTLBEntryFull *fulltlb;
71
} CPUTLBDesc;
72
73
/*
74
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
75
index XXXXXXX..XXXXXXX 100644
76
--- a/accel/tcg/cputlb.c
77
+++ b/accel/tcg/cputlb.c
78
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
79
}
80
81
g_free(fast->table);
82
- g_free(desc->iotlb);
83
+ g_free(desc->fulltlb);
84
85
tlb_window_reset(desc, now, 0);
86
/* desc->n_used_entries is cleared by the caller */
87
fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
88
fast->table = g_try_new(CPUTLBEntry, new_size);
89
- desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
90
+ desc->fulltlb = g_try_new(CPUTLBEntryFull, new_size);
91
92
/*
93
* If the allocations fail, try smaller sizes. We just freed some
94
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
95
* allocations to fail though, so we progressively reduce the allocation
96
* size, aborting if we cannot even allocate the smallest TLB we support.
97
*/
98
- while (fast->table == NULL || desc->iotlb == NULL) {
99
+ while (fast->table == NULL || desc->fulltlb == NULL) {
100
if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
101
error_report("%s: %s", __func__, strerror(errno));
102
abort();
103
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
104
fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
105
106
g_free(fast->table);
107
- g_free(desc->iotlb);
108
+ g_free(desc->fulltlb);
109
fast->table = g_try_new(CPUTLBEntry, new_size);
110
- desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
111
+ desc->fulltlb = g_try_new(CPUTLBEntryFull, new_size);
112
}
113
}
114
115
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now)
116
desc->n_used_entries = 0;
117
fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
118
fast->table = g_new(CPUTLBEntry, n_entries);
119
- desc->iotlb = g_new(CPUIOTLBEntry, n_entries);
120
+ desc->fulltlb = g_new(CPUTLBEntryFull, n_entries);
121
tlb_mmu_flush_locked(desc, fast);
122
}
123
124
@@ -XXX,XX +XXX,XX @@ void tlb_destroy(CPUState *cpu)
125
CPUTLBDescFast *fast = &env_tlb(env)->f[i];
126
127
g_free(fast->table);
128
- g_free(desc->iotlb);
129
+ g_free(desc->fulltlb);
130
}
131
}
132
133
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
134
135
/* Evict the old entry into the victim tlb. */
136
copy_tlb_helper_locked(tv, te);
137
- desc->viotlb[vidx] = desc->iotlb[index];
138
+ desc->vfulltlb[vidx] = desc->fulltlb[index];
139
tlb_n_used_entries_dec(env, mmu_idx);
140
}
141
142
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
143
* subtract here is that of the page base, and not the same as the
144
* vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
145
*/
146
- desc->iotlb[index].addr = iotlb - vaddr_page;
147
- desc->iotlb[index].attrs = attrs;
148
+ desc->fulltlb[index].xlat_section = iotlb - vaddr_page;
149
+ desc->fulltlb[index].attrs = attrs;
150
151
/* Now calculate the new entry */
152
tn.addend = addend - vaddr_page;
153
@@ -XXX,XX +XXX,XX @@ static inline void cpu_transaction_failed(CPUState *cpu, hwaddr physaddr,
154
}
155
}
156
157
-static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
158
+static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full,
159
int mmu_idx, target_ulong addr, uintptr_t retaddr,
160
MMUAccessType access_type, MemOp op)
24
{
161
{
25
va_list va;
162
@@ -XXX,XX +XXX,XX @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
26
- TCGv_vec v0, v1, v2, t1;
163
bool locked = false;
27
+ TCGv_vec v0, v1, v2, t1, t2, c1;
164
MemTxResult r;
28
TCGArg a2;
165
29
166
- section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
30
va_start(va, a0);
167
+ section = iotlb_to_section(cpu, full->xlat_section, full->attrs);
31
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
168
mr = section->mr;
32
tcg_temp_free_vec(t1);
169
- mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
33
break;
170
+ mr_offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
34
171
cpu->mem_io_pc = retaddr;
35
+ case INDEX_op_rotlv_vec:
172
if (!cpu->can_do_io) {
36
+ v2 = temp_tcgv_vec(arg_temp(a2));
173
cpu_io_recompile(cpu, retaddr);
37
+ t1 = tcg_temp_new_vec(type);
174
@@ -XXX,XX +XXX,XX @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
38
+ c1 = tcg_constant_vec(type, vece, 8 << vece);
175
qemu_mutex_lock_iothread();
39
+ tcg_gen_sub_vec(vece, t1, v2, c1);
176
locked = true;
40
+ /* Right shifts are negative left shifts for NEON. */
177
}
41
+ vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(t1),
178
- r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs);
42
+ tcgv_vec_arg(v1), tcgv_vec_arg(t1));
179
+ r = memory_region_dispatch_read(mr, mr_offset, &val, op, full->attrs);
43
+ vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(v0),
180
if (r != MEMTX_OK) {
44
+ tcgv_vec_arg(v1), tcgv_vec_arg(v2));
181
hwaddr physaddr = mr_offset +
45
+ tcg_gen_or_vec(vece, v0, v0, t1);
182
section->offset_within_address_space -
46
+ tcg_temp_free_vec(t1);
183
section->offset_within_region;
47
+ break;
184
48
+
185
cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type,
49
+ case INDEX_op_rotrv_vec:
186
- mmu_idx, iotlbentry->attrs, r, retaddr);
50
+ v2 = temp_tcgv_vec(arg_temp(a2));
187
+ mmu_idx, full->attrs, r, retaddr);
51
+ t1 = tcg_temp_new_vec(type);
188
}
52
+ t2 = tcg_temp_new_vec(type);
189
if (locked) {
53
+ c1 = tcg_constant_vec(type, vece, 8 << vece);
190
qemu_mutex_unlock_iothread();
54
+ tcg_gen_neg_vec(vece, t1, v2);
191
@@ -XXX,XX +XXX,XX @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
55
+ tcg_gen_sub_vec(vece, t2, c1, v2);
192
}
56
+ /* Right shifts are negative left shifts for NEON. */
193
57
+ vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(t1),
194
/*
58
+ tcgv_vec_arg(v1), tcgv_vec_arg(t1));
195
- * Save a potentially trashed IOTLB entry for later lookup by plugin.
59
+ vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(t2),
196
- * This is read by tlb_plugin_lookup if the iotlb entry doesn't match
60
+ tcgv_vec_arg(v1), tcgv_vec_arg(t2));
197
+ * Save a potentially trashed CPUTLBEntryFull for later lookup by plugin.
61
+ tcg_gen_or_vec(vece, v0, t1, t2);
198
+ * This is read by tlb_plugin_lookup if the fulltlb entry doesn't match
62
+ tcg_temp_free_vec(t1);
199
* because of the side effect of io_writex changing memory layout.
63
+ tcg_temp_free_vec(t2);
200
*/
64
+ break;
201
static void save_iotlb_data(CPUState *cs, hwaddr addr,
65
+
202
@@ -XXX,XX +XXX,XX @@ static void save_iotlb_data(CPUState *cs, hwaddr addr,
66
default:
203
#endif
67
g_assert_not_reached();
204
}
68
}
205
206
-static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
207
+static void io_writex(CPUArchState *env, CPUTLBEntryFull *full,
208
int mmu_idx, uint64_t val, target_ulong addr,
209
uintptr_t retaddr, MemOp op)
210
{
211
@@ -XXX,XX +XXX,XX @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
212
bool locked = false;
213
MemTxResult r;
214
215
- section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
216
+ section = iotlb_to_section(cpu, full->xlat_section, full->attrs);
217
mr = section->mr;
218
- mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
219
+ mr_offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
220
if (!cpu->can_do_io) {
221
cpu_io_recompile(cpu, retaddr);
222
}
223
@@ -XXX,XX +XXX,XX @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
224
* The memory_region_dispatch may trigger a flush/resize
225
* so for plugins we save the iotlb_data just in case.
226
*/
227
- save_iotlb_data(cpu, iotlbentry->addr, section, mr_offset);
228
+ save_iotlb_data(cpu, full->xlat_section, section, mr_offset);
229
230
if (!qemu_mutex_iothread_locked()) {
231
qemu_mutex_lock_iothread();
232
locked = true;
233
}
234
- r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs);
235
+ r = memory_region_dispatch_write(mr, mr_offset, val, op, full->attrs);
236
if (r != MEMTX_OK) {
237
hwaddr physaddr = mr_offset +
238
section->offset_within_address_space -
239
section->offset_within_region;
240
241
cpu_transaction_failed(cpu, physaddr, addr, memop_size(op),
242
- MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r,
243
+ MMU_DATA_STORE, mmu_idx, full->attrs, r,
244
retaddr);
245
}
246
if (locked) {
247
@@ -XXX,XX +XXX,XX @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
248
copy_tlb_helper_locked(vtlb, &tmptlb);
249
qemu_spin_unlock(&env_tlb(env)->c.lock);
250
251
- CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index];
252
- CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx];
253
- tmpio = *io; *io = *vio; *vio = tmpio;
254
+ CPUTLBEntryFull *f1 = &env_tlb(env)->d[mmu_idx].fulltlb[index];
255
+ CPUTLBEntryFull *f2 = &env_tlb(env)->d[mmu_idx].vfulltlb[vidx];
256
+ CPUTLBEntryFull tmpf;
257
+ tmpf = *f1; *f1 = *f2; *f2 = tmpf;
258
return true;
259
}
260
}
261
@@ -XXX,XX +XXX,XX @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
262
(ADDR) & TARGET_PAGE_MASK)
263
264
static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
265
- CPUIOTLBEntry *iotlbentry, uintptr_t retaddr)
266
+ CPUTLBEntryFull *full, uintptr_t retaddr)
267
{
268
- ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr;
269
+ ram_addr_t ram_addr = mem_vaddr + full->xlat_section;
270
271
trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size);
272
273
@@ -XXX,XX +XXX,XX @@ int probe_access_flags(CPUArchState *env, target_ulong addr,
274
/* Handle clean RAM pages. */
275
if (unlikely(flags & TLB_NOTDIRTY)) {
276
uintptr_t index = tlb_index(env, mmu_idx, addr);
277
- CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
278
+ CPUTLBEntryFull *full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
279
280
- notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);
281
+ notdirty_write(env_cpu(env), addr, 1, full, retaddr);
282
flags &= ~TLB_NOTDIRTY;
283
}
284
285
@@ -XXX,XX +XXX,XX @@ void *probe_access(CPUArchState *env, target_ulong addr, int size,
286
287
if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) {
288
uintptr_t index = tlb_index(env, mmu_idx, addr);
289
- CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
290
+ CPUTLBEntryFull *full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
291
292
/* Handle watchpoints. */
293
if (flags & TLB_WATCHPOINT) {
294
int wp_access = (access_type == MMU_DATA_STORE
295
? BP_MEM_WRITE : BP_MEM_READ);
296
cpu_check_watchpoint(env_cpu(env), addr, size,
297
- iotlbentry->attrs, wp_access, retaddr);
298
+ full->attrs, wp_access, retaddr);
299
}
300
301
/* Handle clean RAM pages. */
302
if (flags & TLB_NOTDIRTY) {
303
- notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);
304
+ notdirty_write(env_cpu(env), addr, 1, full, retaddr);
305
}
306
}
307
308
@@ -XXX,XX +XXX,XX @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
309
* should have just filled the TLB. The one corner case is io_writex
310
* which can cause TLB flushes and potential resizing of the TLBs
311
* losing the information we need. In those cases we need to recover
312
- * data from a copy of the iotlbentry. As long as this always occurs
313
+ * data from a copy of the CPUTLBEntryFull. As long as this always occurs
314
* from the same thread (which a mem callback will be) this is safe.
315
*/
316
317
@@ -XXX,XX +XXX,XX @@ bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
318
if (likely(tlb_hit(tlb_addr, addr))) {
319
/* We must have an iotlb entry for MMIO */
320
if (tlb_addr & TLB_MMIO) {
321
- CPUIOTLBEntry *iotlbentry;
322
- iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
323
+ CPUTLBEntryFull *full;
324
+ full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
325
data->is_io = true;
326
- data->v.io.section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
327
- data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
328
+ data->v.io.section =
329
+ iotlb_to_section(cpu, full->xlat_section, full->attrs);
330
+ data->v.io.offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
331
} else {
332
data->is_io = false;
333
data->v.ram.hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
334
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
335
336
if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
337
notdirty_write(env_cpu(env), addr, size,
338
- &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr);
339
+ &env_tlb(env)->d[mmu_idx].fulltlb[index], retaddr);
340
}
341
342
return hostaddr;
343
@@ -XXX,XX +XXX,XX @@ load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
344
345
/* Handle anything that isn't just a straight memory access. */
346
if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
347
- CPUIOTLBEntry *iotlbentry;
348
+ CPUTLBEntryFull *full;
349
bool need_swap;
350
351
/* For anything that is unaligned, recurse through full_load. */
352
@@ -XXX,XX +XXX,XX @@ load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
353
goto do_unaligned_access;
354
}
355
356
- iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
357
+ full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
358
359
/* Handle watchpoints. */
360
if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
361
/* On watchpoint hit, this will longjmp out. */
362
cpu_check_watchpoint(env_cpu(env), addr, size,
363
- iotlbentry->attrs, BP_MEM_READ, retaddr);
364
+ full->attrs, BP_MEM_READ, retaddr);
365
}
366
367
need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
368
369
/* Handle I/O access. */
370
if (likely(tlb_addr & TLB_MMIO)) {
371
- return io_readx(env, iotlbentry, mmu_idx, addr, retaddr,
372
+ return io_readx(env, full, mmu_idx, addr, retaddr,
373
access_type, op ^ (need_swap * MO_BSWAP));
374
}
375
376
@@ -XXX,XX +XXX,XX @@ store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
377
*/
378
if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
379
cpu_check_watchpoint(env_cpu(env), addr, size - size2,
380
- env_tlb(env)->d[mmu_idx].iotlb[index].attrs,
381
+ env_tlb(env)->d[mmu_idx].fulltlb[index].attrs,
382
BP_MEM_WRITE, retaddr);
383
}
384
if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) {
385
cpu_check_watchpoint(env_cpu(env), page2, size2,
386
- env_tlb(env)->d[mmu_idx].iotlb[index2].attrs,
387
+ env_tlb(env)->d[mmu_idx].fulltlb[index2].attrs,
388
BP_MEM_WRITE, retaddr);
389
}
390
391
@@ -XXX,XX +XXX,XX @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
392
393
/* Handle anything that isn't just a straight memory access. */
394
if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
395
- CPUIOTLBEntry *iotlbentry;
396
+ CPUTLBEntryFull *full;
397
bool need_swap;
398
399
/* For anything that is unaligned, recurse through byte stores. */
400
@@ -XXX,XX +XXX,XX @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
401
goto do_unaligned_access;
402
}
403
404
- iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
405
+ full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
406
407
/* Handle watchpoints. */
408
if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
409
/* On watchpoint hit, this will longjmp out. */
410
cpu_check_watchpoint(env_cpu(env), addr, size,
411
- iotlbentry->attrs, BP_MEM_WRITE, retaddr);
412
+ full->attrs, BP_MEM_WRITE, retaddr);
413
}
414
415
need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
416
417
/* Handle I/O access. */
418
if (tlb_addr & TLB_MMIO) {
419
- io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr,
420
+ io_writex(env, full, mmu_idx, val, addr, retaddr,
421
op ^ (need_swap * MO_BSWAP));
422
return;
423
}
424
@@ -XXX,XX +XXX,XX @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
425
426
/* Handle clean RAM pages. */
427
if (tlb_addr & TLB_NOTDIRTY) {
428
- notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
429
+ notdirty_write(env_cpu(env), addr, size, full, retaddr);
430
}
431
432
haddr = (void *)((uintptr_t)addr + entry->addend);
433
diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
434
index XXXXXXX..XXXXXXX 100644
435
--- a/target/arm/mte_helper.c
436
+++ b/target/arm/mte_helper.c
437
@@ -XXX,XX +XXX,XX @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
438
return tags + index;
439
#else
440
uintptr_t index;
441
- CPUIOTLBEntry *iotlbentry;
442
+ CPUTLBEntryFull *full;
443
int in_page, flags;
444
ram_addr_t ptr_ra;
445
hwaddr ptr_paddr, tag_paddr, xlat;
446
@@ -XXX,XX +XXX,XX @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
447
assert(!(flags & TLB_INVALID_MASK));
448
449
/*
450
- * Find the iotlbentry for ptr. This *must* be present in the TLB
451
+ * Find the CPUTLBEntryFull for ptr. This *must* be present in the TLB
452
* because we just found the mapping.
453
* TODO: Perhaps there should be a cputlb helper that returns a
454
* matching tlb entry + iotlb entry.
455
@@ -XXX,XX +XXX,XX @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
456
g_assert(tlb_hit(comparator, ptr));
457
}
458
# endif
459
- iotlbentry = &env_tlb(env)->d[ptr_mmu_idx].iotlb[index];
460
+ full = &env_tlb(env)->d[ptr_mmu_idx].fulltlb[index];
461
462
/* If the virtual page MemAttr != Tagged, access unchecked. */
463
- if (!arm_tlb_mte_tagged(&iotlbentry->attrs)) {
464
+ if (!arm_tlb_mte_tagged(&full->attrs)) {
465
return NULL;
466
}
467
468
@@ -XXX,XX +XXX,XX @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
469
int wp = ptr_access == MMU_DATA_LOAD ? BP_MEM_READ : BP_MEM_WRITE;
470
assert(ra != 0);
471
cpu_check_watchpoint(env_cpu(env), ptr, ptr_size,
472
- iotlbentry->attrs, wp, ra);
473
+ full->attrs, wp, ra);
474
}
475
476
/*
477
@@ -XXX,XX +XXX,XX @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
478
tag_paddr = ptr_paddr >> (LOG2_TAG_GRANULE + 1);
479
480
/* Look up the address in tag space. */
481
- tag_asi = iotlbentry->attrs.secure ? ARMASIdx_TagS : ARMASIdx_TagNS;
482
+ tag_asi = full->attrs.secure ? ARMASIdx_TagS : ARMASIdx_TagNS;
483
tag_as = cpu_get_address_space(env_cpu(env), tag_asi);
484
mr = address_space_translate(tag_as, tag_paddr, &xlat, NULL,
485
tag_access == MMU_DATA_STORE,
486
- iotlbentry->attrs);
487
+ full->attrs);
488
489
/*
490
* Note that @mr will never be NULL. If there is nothing in the address
491
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
492
index XXXXXXX..XXXXXXX 100644
493
--- a/target/arm/sve_helper.c
494
+++ b/target/arm/sve_helper.c
495
@@ -XXX,XX +XXX,XX @@ bool sve_probe_page(SVEHostPage *info, bool nofault, CPUARMState *env,
496
g_assert(tlb_hit(comparator, addr));
497
# endif
498
499
- CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
500
- info->attrs = iotlbentry->attrs;
501
+ CPUTLBEntryFull *full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
502
+ info->attrs = full->attrs;
503
}
504
#endif
505
506
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
507
index XXXXXXX..XXXXXXX 100644
508
--- a/target/arm/translate-a64.c
509
+++ b/target/arm/translate-a64.c
510
@@ -XXX,XX +XXX,XX @@ static bool is_guarded_page(CPUARMState *env, DisasContext *s)
511
* table entry even for that case.
512
*/
513
return (tlb_hit(entry->addr_code, addr) &&
514
- arm_tlb_bti_gp(&env_tlb(env)->d[mmu_idx].iotlb[index].attrs));
515
+ arm_tlb_bti_gp(&env_tlb(env)->d[mmu_idx].fulltlb[index].attrs));
516
#endif
517
}
518
69
--
519
--
70
2.25.1
520
2.34.1
71
521
72
522
diff view generated by jsdifflib
1
NEON has 3 instructions implementing this 4 argument operation,
1
This field is only written, not read; remove it.
2
with each insn overlapping a different logical input onto the
3
destination register.
4
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
tcg/arm/tcg-target-con-set.h | 1 +
8
include/hw/core/cpu.h | 1 -
9
tcg/arm/tcg-target.h | 2 +-
9
accel/tcg/cputlb.c | 7 +++----
10
tcg/arm/tcg-target.c.inc | 22 ++++++++++++++++++++--
10
2 files changed, 3 insertions(+), 5 deletions(-)
11
3 files changed, 22 insertions(+), 3 deletions(-)
12
11
13
diff --git a/tcg/arm/tcg-target-con-set.h b/tcg/arm/tcg-target-con-set.h
12
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
14
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/arm/tcg-target-con-set.h
14
--- a/include/hw/core/cpu.h
16
+++ b/tcg/arm/tcg-target-con-set.h
15
+++ b/include/hw/core/cpu.h
17
@@ -XXX,XX +XXX,XX @@ C_O1_I2(w, w, w)
16
@@ -XXX,XX +XXX,XX @@ struct CPUWatchpoint {
18
C_O1_I2(w, w, wO)
17
* the memory regions get moved around by io_writex.
19
C_O1_I2(w, w, wV)
18
*/
20
C_O1_I2(w, w, wZ)
19
typedef struct SavedIOTLB {
21
+C_O1_I3(w, w, w, w)
20
- hwaddr addr;
22
C_O1_I4(r, r, r, rI, rI)
21
MemoryRegionSection *section;
23
C_O1_I4(r, r, rIN, rIK, 0)
22
hwaddr mr_offset;
24
C_O2_I1(r, r, l)
23
} SavedIOTLB;
25
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
24
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
26
index XXXXXXX..XXXXXXX 100644
25
index XXXXXXX..XXXXXXX 100644
27
--- a/tcg/arm/tcg-target.h
26
--- a/accel/tcg/cputlb.c
28
+++ b/tcg/arm/tcg-target.h
27
+++ b/accel/tcg/cputlb.c
29
@@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions;
28
@@ -XXX,XX +XXX,XX @@ static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full,
30
#define TCG_TARGET_HAS_mul_vec 1
29
* This is read by tlb_plugin_lookup if the fulltlb entry doesn't match
31
#define TCG_TARGET_HAS_sat_vec 1
30
* because of the side effect of io_writex changing memory layout.
32
#define TCG_TARGET_HAS_minmax_vec 1
31
*/
33
-#define TCG_TARGET_HAS_bitsel_vec 0
32
-static void save_iotlb_data(CPUState *cs, hwaddr addr,
34
+#define TCG_TARGET_HAS_bitsel_vec 1
33
- MemoryRegionSection *section, hwaddr mr_offset)
35
#define TCG_TARGET_HAS_cmpsel_vec 0
34
+static void save_iotlb_data(CPUState *cs, MemoryRegionSection *section,
36
35
+ hwaddr mr_offset)
37
#define TCG_TARGET_DEFAULT_MO (0)
38
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
39
index XXXXXXX..XXXXXXX 100644
40
--- a/tcg/arm/tcg-target.c.inc
41
+++ b/tcg/arm/tcg-target.c.inc
42
@@ -XXX,XX +XXX,XX @@ typedef enum {
43
INSN_VSARI = 0xf2800010, /* VSHR.S */
44
INSN_VSHRI = 0xf3800010, /* VSHR.U */
45
46
+ INSN_VBSL = 0xf3100110,
47
+ INSN_VBIT = 0xf3200110,
48
+ INSN_VBIF = 0xf3300110,
49
+
50
INSN_VTST = 0xf2000810,
51
52
INSN_VDUP_G = 0xee800b10, /* VDUP (ARM core register) */
53
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
54
return C_O1_I2(w, w, wV);
55
case INDEX_op_cmp_vec:
56
return C_O1_I2(w, w, wZ);
57
-
58
+ case INDEX_op_bitsel_vec:
59
+ return C_O1_I3(w, w, w, w);
60
default:
61
g_assert_not_reached();
62
}
63
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
64
{
36
{
65
TCGType type = vecl + TCG_TYPE_V64;
37
#ifdef CONFIG_PLUGIN
66
unsigned q = vecl;
38
SavedIOTLB *saved = &cs->saved_iotlb;
67
- TCGArg a0, a1, a2;
39
- saved->addr = addr;
68
+ TCGArg a0, a1, a2, a3;
40
saved->section = section;
69
int cmode, imm8;
41
saved->mr_offset = mr_offset;
70
42
#endif
71
a0 = args[0];
43
@@ -XXX,XX +XXX,XX @@ static void io_writex(CPUArchState *env, CPUTLBEntryFull *full,
72
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
44
* The memory_region_dispatch may trigger a flush/resize
73
}
45
* so for plugins we save the iotlb_data just in case.
74
return;
46
*/
75
47
- save_iotlb_data(cpu, full->xlat_section, section, mr_offset);
76
+ case INDEX_op_bitsel_vec:
48
+ save_iotlb_data(cpu, section, mr_offset);
77
+ a3 = args[3];
49
78
+ if (a0 == a3) {
50
if (!qemu_mutex_iothread_locked()) {
79
+ tcg_out_vreg3(s, INSN_VBIT, q, 0, a0, a2, a1);
51
qemu_mutex_lock_iothread();
80
+ } else if (a0 == a2) {
81
+ tcg_out_vreg3(s, INSN_VBIF, q, 0, a0, a3, a1);
82
+ } else {
83
+ tcg_out_mov(s, type, a0, a1);
84
+ tcg_out_vreg3(s, INSN_VBSL, q, 0, a0, a2, a3);
85
+ }
86
+ return;
87
+
88
case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
89
case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
90
default:
91
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
92
case INDEX_op_sssub_vec:
93
case INDEX_op_usadd_vec:
94
case INDEX_op_ussub_vec:
95
+ case INDEX_op_bitsel_vec:
96
return 1;
97
case INDEX_op_abs_vec:
98
case INDEX_op_cmp_vec:
99
--
52
--
100
2.25.1
53
2.34.1
101
54
102
55
diff view generated by jsdifflib
1
This is minimum and maximum, signed and unsigned.
1
When PAGE_WRITE_INV is set when calling tlb_set_page,
2
we immediately set TLB_INVALID_MASK in order to force
3
tlb_fill to be called on the next lookup. Here in
4
probe_access_internal, we have just called tlb_fill
5
and eliminated true misses, thus the lookup must be valid.
2
6
7
This allows us to remove a warning comment from s390x.
8
There doesn't seem to be a reason to change the code though.
9
10
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
11
Reviewed-by: David Hildenbrand <david@redhat.com>
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
12
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
14
---
6
tcg/arm/tcg-target.h | 2 +-
15
accel/tcg/cputlb.c | 10 +++++++++-
7
tcg/arm/tcg-target.c.inc | 24 ++++++++++++++++++++++++
16
target/s390x/tcg/mem_helper.c | 4 ----
8
2 files changed, 25 insertions(+), 1 deletion(-)
17
2 files changed, 9 insertions(+), 5 deletions(-)
9
18
10
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
19
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
11
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/arm/tcg-target.h
21
--- a/accel/tcg/cputlb.c
13
+++ b/tcg/arm/tcg-target.h
22
+++ b/accel/tcg/cputlb.c
14
@@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions;
23
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
15
#define TCG_TARGET_HAS_shv_vec 0
24
}
16
#define TCG_TARGET_HAS_mul_vec 1
25
tlb_addr = tlb_read_ofs(entry, elt_ofs);
17
#define TCG_TARGET_HAS_sat_vec 1
26
18
-#define TCG_TARGET_HAS_minmax_vec 0
27
+ flags = TLB_FLAGS_MASK;
19
+#define TCG_TARGET_HAS_minmax_vec 1
28
page_addr = addr & TARGET_PAGE_MASK;
20
#define TCG_TARGET_HAS_bitsel_vec 0
29
if (!tlb_hit_page(tlb_addr, page_addr)) {
21
#define TCG_TARGET_HAS_cmpsel_vec 0
30
if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) {
22
31
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
23
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
32
33
/* TLB resize via tlb_fill may have moved the entry. */
34
entry = tlb_entry(env, mmu_idx, addr);
35
+
36
+ /*
37
+ * With PAGE_WRITE_INV, we set TLB_INVALID_MASK immediately,
38
+ * to force the next access through tlb_fill. We've just
39
+ * called tlb_fill, so we know that this entry *is* valid.
40
+ */
41
+ flags &= ~TLB_INVALID_MASK;
42
}
43
tlb_addr = tlb_read_ofs(entry, elt_ofs);
44
}
45
- flags = tlb_addr & TLB_FLAGS_MASK;
46
+ flags &= tlb_addr;
47
48
/* Fold all "mmio-like" bits into TLB_MMIO. This is not RAM. */
49
if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) {
50
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
24
index XXXXXXX..XXXXXXX 100644
51
index XXXXXXX..XXXXXXX 100644
25
--- a/tcg/arm/tcg-target.c.inc
52
--- a/target/s390x/tcg/mem_helper.c
26
+++ b/tcg/arm/tcg-target.c.inc
53
+++ b/target/s390x/tcg/mem_helper.c
27
@@ -XXX,XX +XXX,XX @@ typedef enum {
54
@@ -XXX,XX +XXX,XX @@ static int s390_probe_access(CPUArchState *env, target_ulong addr, int size,
28
INSN_VQADD_U = 0xf3000010,
55
#else
29
INSN_VQSUB = 0xf2000210,
56
int flags;
30
INSN_VQSUB_U = 0xf3000210,
57
31
+ INSN_VMAX = 0xf2000600,
58
- /*
32
+ INSN_VMAX_U = 0xf3000600,
59
- * For !CONFIG_USER_ONLY, we cannot rely on TLB_INVALID_MASK or haddr==NULL
33
+ INSN_VMIN = 0xf2000610,
60
- * to detect if there was an exception during tlb_fill().
34
+ INSN_VMIN_U = 0xf3000610,
61
- */
35
62
env->tlb_fill_exc = 0;
36
INSN_VABS = 0xf3b10300,
63
flags = probe_access_flags(env, addr, access_type, mmu_idx, nonfault, phost,
37
INSN_VMVN = 0xf3b00580,
64
ra);
38
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
39
case INDEX_op_dup2_vec:
40
case INDEX_op_add_vec:
41
case INDEX_op_mul_vec:
42
+ case INDEX_op_smax_vec:
43
+ case INDEX_op_smin_vec:
44
case INDEX_op_ssadd_vec:
45
case INDEX_op_sssub_vec:
46
case INDEX_op_sub_vec:
47
+ case INDEX_op_umax_vec:
48
+ case INDEX_op_umin_vec:
49
case INDEX_op_usadd_vec:
50
case INDEX_op_ussub_vec:
51
case INDEX_op_xor_vec:
52
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
53
case INDEX_op_mul_vec:
54
tcg_out_vreg3(s, INSN_VMUL, q, vece, a0, a1, a2);
55
return;
56
+ case INDEX_op_smax_vec:
57
+ tcg_out_vreg3(s, INSN_VMAX, q, vece, a0, a1, a2);
58
+ return;
59
+ case INDEX_op_smin_vec:
60
+ tcg_out_vreg3(s, INSN_VMIN, q, vece, a0, a1, a2);
61
+ return;
62
case INDEX_op_sub_vec:
63
tcg_out_vreg3(s, INSN_VSUB, q, vece, a0, a1, a2);
64
return;
65
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
66
case INDEX_op_sssub_vec:
67
tcg_out_vreg3(s, INSN_VQSUB, q, vece, a0, a1, a2);
68
return;
69
+ case INDEX_op_umax_vec:
70
+ tcg_out_vreg3(s, INSN_VMAX_U, q, vece, a0, a1, a2);
71
+ return;
72
+ case INDEX_op_umin_vec:
73
+ tcg_out_vreg3(s, INSN_VMIN_U, q, vece, a0, a1, a2);
74
+ return;
75
case INDEX_op_usadd_vec:
76
tcg_out_vreg3(s, INSN_VQADD_U, q, vece, a0, a1, a2);
77
return;
78
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
79
case INDEX_op_cmp_vec:
80
case INDEX_op_mul_vec:
81
case INDEX_op_neg_vec:
82
+ case INDEX_op_smax_vec:
83
+ case INDEX_op_smin_vec:
84
+ case INDEX_op_umax_vec:
85
+ case INDEX_op_umin_vec:
86
return vece < MO_64;
87
default:
88
return 0;
89
--
65
--
90
2.25.1
66
2.34.1
91
67
92
68
diff view generated by jsdifflib
1
Implement via expansion, so don't actually set TCG_TARGET_HAS_roti_vec.
1
Add an interface to return the CPUTLBEntryFull struct
2
For NEON, this is shift-right followed by shift-left-and-insert.
2
that goes with the lookup. The result is not intended
3
to be valid across multiple lookups, so the user must
4
use the results immediately.
3
5
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
10
---
7
tcg/arm/tcg-target-con-set.h | 1 +
11
include/exec/exec-all.h | 15 +++++++++++++
8
tcg/arm/tcg-target.opc.h | 1 +
12
include/qemu/typedefs.h | 1 +
9
tcg/arm/tcg-target.c.inc | 15 +++++++++++++++
13
accel/tcg/cputlb.c | 47 +++++++++++++++++++++++++----------------
10
3 files changed, 17 insertions(+)
14
3 files changed, 45 insertions(+), 18 deletions(-)
11
15
12
diff --git a/tcg/arm/tcg-target-con-set.h b/tcg/arm/tcg-target-con-set.h
16
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
13
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/arm/tcg-target-con-set.h
18
--- a/include/exec/exec-all.h
15
+++ b/tcg/arm/tcg-target-con-set.h
19
+++ b/include/exec/exec-all.h
16
@@ -XXX,XX +XXX,XX @@ C_O1_I2(r, r, rIK)
20
@@ -XXX,XX +XXX,XX @@ int probe_access_flags(CPUArchState *env, target_ulong addr,
17
C_O1_I2(r, r, rIN)
21
MMUAccessType access_type, int mmu_idx,
18
C_O1_I2(r, r, ri)
22
bool nonfault, void **phost, uintptr_t retaddr);
19
C_O1_I2(r, rZ, rZ)
23
20
+C_O1_I2(w, 0, w)
24
+#ifndef CONFIG_USER_ONLY
21
C_O1_I2(w, w, w)
25
+/**
22
C_O1_I2(w, w, wO)
26
+ * probe_access_full:
23
C_O1_I2(w, w, wV)
27
+ * Like probe_access_flags, except also return into @pfull.
24
diff --git a/tcg/arm/tcg-target.opc.h b/tcg/arm/tcg-target.opc.h
28
+ *
29
+ * The CPUTLBEntryFull structure returned via @pfull is transient
30
+ * and must be consumed or copied immediately, before any further
31
+ * access or changes to TLB @mmu_idx.
32
+ */
33
+int probe_access_full(CPUArchState *env, target_ulong addr,
34
+ MMUAccessType access_type, int mmu_idx,
35
+ bool nonfault, void **phost,
36
+ CPUTLBEntryFull **pfull, uintptr_t retaddr);
37
+#endif
38
+
39
#define CODE_GEN_ALIGN 16 /* must be >= of the size of a icache line */
40
41
/* Estimated block size for TB allocation. */
42
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
25
index XXXXXXX..XXXXXXX 100644
43
index XXXXXXX..XXXXXXX 100644
26
--- a/tcg/arm/tcg-target.opc.h
44
--- a/include/qemu/typedefs.h
27
+++ b/tcg/arm/tcg-target.opc.h
45
+++ b/include/qemu/typedefs.h
28
@@ -XXX,XX +XXX,XX @@
46
@@ -XXX,XX +XXX,XX @@ typedef struct ConfidentialGuestSupport ConfidentialGuestSupport;
29
* consider these to be UNSPEC with names.
47
typedef struct CPUAddressSpace CPUAddressSpace;
30
*/
48
typedef struct CPUArchState CPUArchState;
31
49
typedef struct CPUState CPUState;
32
+DEF(arm_sli_vec, 1, 2, 1, IMPLVEC)
50
+typedef struct CPUTLBEntryFull CPUTLBEntryFull;
33
DEF(arm_sshl_vec, 1, 2, 0, IMPLVEC)
51
typedef struct DeviceListener DeviceListener;
34
DEF(arm_ushl_vec, 1, 2, 0, IMPLVEC)
52
typedef struct DeviceState DeviceState;
35
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
53
typedef struct DirtyBitmapSnapshot DirtyBitmapSnapshot;
54
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
36
index XXXXXXX..XXXXXXX 100644
55
index XXXXXXX..XXXXXXX 100644
37
--- a/tcg/arm/tcg-target.c.inc
56
--- a/accel/tcg/cputlb.c
38
+++ b/tcg/arm/tcg-target.c.inc
57
+++ b/accel/tcg/cputlb.c
39
@@ -XXX,XX +XXX,XX @@ typedef enum {
58
@@ -XXX,XX +XXX,XX @@ static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
40
INSN_VSHLI = 0xf2800510, /* VSHL (immediate) */
59
static int probe_access_internal(CPUArchState *env, target_ulong addr,
41
INSN_VSARI = 0xf2800010, /* VSHR.S */
60
int fault_size, MMUAccessType access_type,
42
INSN_VSHRI = 0xf3800010, /* VSHR.U */
61
int mmu_idx, bool nonfault,
43
+ INSN_VSLI = 0xf3800510,
62
- void **phost, uintptr_t retaddr)
44
INSN_VSHL_S = 0xf2000400, /* VSHL.S (register) */
63
+ void **phost, CPUTLBEntryFull **pfull,
45
INSN_VSHL_U = 0xf3000400, /* VSHL.U (register) */
64
+ uintptr_t retaddr)
46
65
{
47
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
66
uintptr_t index = tlb_index(env, mmu_idx, addr);
48
case INDEX_op_arm_sshl_vec:
67
CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
49
case INDEX_op_arm_ushl_vec:
68
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
50
return C_O1_I2(w, w, w);
69
mmu_idx, nonfault, retaddr)) {
51
+ case INDEX_op_arm_sli_vec:
70
/* Non-faulting page table read failed. */
52
+ return C_O1_I2(w, 0, w);
71
*phost = NULL;
53
case INDEX_op_or_vec:
72
+ *pfull = NULL;
54
case INDEX_op_andc_vec:
73
return TLB_INVALID_MASK;
55
return C_O1_I2(w, w, wO);
74
}
56
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
75
57
case INDEX_op_sari_vec:
76
/* TLB resize via tlb_fill may have moved the entry. */
58
tcg_out_vshifti(s, INSN_VSARI, q, a0, a1, (16 << vece) - a2);
77
+ index = tlb_index(env, mmu_idx, addr);
59
return;
78
entry = tlb_entry(env, mmu_idx, addr);
60
+ case INDEX_op_arm_sli_vec:
79
61
+ tcg_out_vshifti(s, INSN_VSLI, q, a0, a2, args[3] + (8 << vece));
80
/*
62
+ return;
81
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
63
82
}
64
case INDEX_op_andc_vec:
83
flags &= tlb_addr;
65
if (!const_args[2]) {
84
66
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
85
+ *pfull = &env_tlb(env)->d[mmu_idx].fulltlb[index];
67
case INDEX_op_shlv_vec:
86
+
68
case INDEX_op_shrv_vec:
87
/* Fold all "mmio-like" bits into TLB_MMIO. This is not RAM. */
69
case INDEX_op_sarv_vec:
88
if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) {
70
+ case INDEX_op_rotli_vec:
89
*phost = NULL;
90
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
91
return flags;
92
}
93
94
-int probe_access_flags(CPUArchState *env, target_ulong addr,
95
- MMUAccessType access_type, int mmu_idx,
96
- bool nonfault, void **phost, uintptr_t retaddr)
97
+int probe_access_full(CPUArchState *env, target_ulong addr,
98
+ MMUAccessType access_type, int mmu_idx,
99
+ bool nonfault, void **phost, CPUTLBEntryFull **pfull,
100
+ uintptr_t retaddr)
101
{
102
- int flags;
103
-
104
- flags = probe_access_internal(env, addr, 0, access_type, mmu_idx,
105
- nonfault, phost, retaddr);
106
+ int flags = probe_access_internal(env, addr, 0, access_type, mmu_idx,
107
+ nonfault, phost, pfull, retaddr);
108
109
/* Handle clean RAM pages. */
110
if (unlikely(flags & TLB_NOTDIRTY)) {
111
- uintptr_t index = tlb_index(env, mmu_idx, addr);
112
- CPUTLBEntryFull *full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
113
-
114
- notdirty_write(env_cpu(env), addr, 1, full, retaddr);
115
+ notdirty_write(env_cpu(env), addr, 1, *pfull, retaddr);
116
flags &= ~TLB_NOTDIRTY;
117
}
118
119
return flags;
120
}
121
122
+int probe_access_flags(CPUArchState *env, target_ulong addr,
123
+ MMUAccessType access_type, int mmu_idx,
124
+ bool nonfault, void **phost, uintptr_t retaddr)
125
+{
126
+ CPUTLBEntryFull *full;
127
+
128
+ return probe_access_full(env, addr, access_type, mmu_idx,
129
+ nonfault, phost, &full, retaddr);
130
+}
131
+
132
void *probe_access(CPUArchState *env, target_ulong addr, int size,
133
MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
134
{
135
+ CPUTLBEntryFull *full;
136
void *host;
137
int flags;
138
139
g_assert(-(addr | TARGET_PAGE_MASK) >= size);
140
141
flags = probe_access_internal(env, addr, size, access_type, mmu_idx,
142
- false, &host, retaddr);
143
+ false, &host, &full, retaddr);
144
145
/* Per the interface, size == 0 merely faults the access. */
146
if (size == 0) {
147
@@ -XXX,XX +XXX,XX @@ void *probe_access(CPUArchState *env, target_ulong addr, int size,
148
}
149
150
if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) {
151
- uintptr_t index = tlb_index(env, mmu_idx, addr);
152
- CPUTLBEntryFull *full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
153
-
154
/* Handle watchpoints. */
155
if (flags & TLB_WATCHPOINT) {
156
int wp_access = (access_type == MMU_DATA_STORE
157
@@ -XXX,XX +XXX,XX @@ void *probe_access(CPUArchState *env, target_ulong addr, int size,
158
void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
159
MMUAccessType access_type, int mmu_idx)
160
{
161
+ CPUTLBEntryFull *full;
162
void *host;
163
int flags;
164
165
flags = probe_access_internal(env, addr, 0, access_type,
166
- mmu_idx, true, &host, 0);
167
+ mmu_idx, true, &host, &full, 0);
168
169
/* No combination of flags are expected by the caller. */
170
return flags ? NULL : host;
171
@@ -XXX,XX +XXX,XX @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
172
tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
173
void **hostp)
174
{
175
+ CPUTLBEntryFull *full;
176
void *p;
177
178
(void)probe_access_internal(env, addr, 1, MMU_INST_FETCH,
179
- cpu_mmu_index(env, true), false, &p, 0);
180
+ cpu_mmu_index(env, true), false, &p, &full, 0);
181
if (p == NULL) {
71
return -1;
182
return -1;
72
default:
73
return 0;
74
@@ -XXX,XX +XXX,XX @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
75
tcg_temp_free_vec(t1);
76
break;
77
78
+ case INDEX_op_rotli_vec:
79
+ t1 = tcg_temp_new_vec(type);
80
+ tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
81
+ vec_gen_4(INDEX_op_arm_sli_vec, type, vece,
82
+ tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
83
+ tcg_temp_free_vec(t1);
84
+ break;
85
+
86
default:
87
g_assert_not_reached();
88
}
183
}
89
--
184
--
90
2.25.1
185
2.34.1
91
186
92
187
diff view generated by jsdifflib
1
This consists of the three immediate shifts: shli, shri, sari.
1
Now that we have collected all of the page data into
2
CPUTLBEntryFull, provide an interface to record that
3
all in one go, instead of using 4 arguments. This interface
4
allows CPUTLBEntryFull to be extended without having to
5
change the number of arguments.
2
6
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
11
---
6
tcg/arm/tcg-target.h | 2 +-
12
include/exec/cpu-defs.h | 14 +++++++++++
7
tcg/arm/tcg-target.c.inc | 27 +++++++++++++++++++++++++++
13
include/exec/exec-all.h | 22 ++++++++++++++++++
8
2 files changed, 28 insertions(+), 1 deletion(-)
14
accel/tcg/cputlb.c | 51 ++++++++++++++++++++++++++---------------
15
3 files changed, 69 insertions(+), 18 deletions(-)
9
16
10
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
17
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
11
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/arm/tcg-target.h
19
--- a/include/exec/cpu-defs.h
13
+++ b/tcg/arm/tcg-target.h
20
+++ b/include/exec/cpu-defs.h
14
@@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions;
21
@@ -XXX,XX +XXX,XX @@ typedef struct CPUTLBEntryFull {
15
#define TCG_TARGET_HAS_roti_vec 0
22
* + the offset within the target MemoryRegion (otherwise)
16
#define TCG_TARGET_HAS_rots_vec 0
23
*/
17
#define TCG_TARGET_HAS_rotv_vec 0
24
hwaddr xlat_section;
18
-#define TCG_TARGET_HAS_shi_vec 0
25
+
19
+#define TCG_TARGET_HAS_shi_vec 1
26
+ /*
20
#define TCG_TARGET_HAS_shs_vec 0
27
+ * @phys_addr contains the physical address in the address space
21
#define TCG_TARGET_HAS_shv_vec 0
28
+ * given by cpu_asidx_from_attrs(cpu, @attrs).
22
#define TCG_TARGET_HAS_mul_vec 0
29
+ */
23
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
30
+ hwaddr phys_addr;
31
+
32
+ /* @attrs contains the memory transaction attributes for the page. */
33
MemTxAttrs attrs;
34
+
35
+ /* @prot contains the complete protections for the page. */
36
+ uint8_t prot;
37
+
38
+ /* @lg_page_size contains the log2 of the page size. */
39
+ uint8_t lg_page_size;
40
} CPUTLBEntryFull;
41
42
/*
43
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
24
index XXXXXXX..XXXXXXX 100644
44
index XXXXXXX..XXXXXXX 100644
25
--- a/tcg/arm/tcg-target.c.inc
45
--- a/include/exec/exec-all.h
26
+++ b/tcg/arm/tcg-target.c.inc
46
+++ b/include/exec/exec-all.h
27
@@ -XXX,XX +XXX,XX @@ typedef enum {
47
@@ -XXX,XX +XXX,XX @@ void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu,
28
INSN_VCGE_U = 0xf3000310,
48
uint16_t idxmap,
29
INSN_VCGT_U = 0xf3000300,
49
unsigned bits);
30
50
31
+ INSN_VSHLI = 0xf2800510, /* VSHL (immediate) */
51
+/**
32
+ INSN_VSARI = 0xf2800010, /* VSHR.S */
52
+ * tlb_set_page_full:
33
+ INSN_VSHRI = 0xf3800010, /* VSHR.U */
53
+ * @cpu: CPU context
54
+ * @mmu_idx: mmu index of the tlb to modify
55
+ * @vaddr: virtual address of the entry to add
56
+ * @full: the details of the tlb entry
57
+ *
58
+ * Add an entry to @cpu tlb index @mmu_idx. All of the fields of
59
+ * @full must be filled, except for xlat_section, and constitute
60
+ * the complete description of the translated page.
61
+ *
62
+ * This is generally called by the target tlb_fill function after
63
+ * having performed a successful page table walk to find the physical
64
+ * address and attributes for the translation.
65
+ *
66
+ * At most one entry for a given virtual address is permitted. Only a
67
+ * single TARGET_PAGE_SIZE region is mapped; @full->lg_page_size is only
68
+ * used by tlb_flush_page.
69
+ */
70
+void tlb_set_page_full(CPUState *cpu, int mmu_idx, target_ulong vaddr,
71
+ CPUTLBEntryFull *full);
34
+
72
+
35
INSN_VTST = 0xf2000810,
73
/**
36
74
* tlb_set_page_with_attrs:
37
INSN_VDUP_G = 0xee800b10, /* VDUP (ARM core register) */
75
* @cpu: CPU to add this TLB entry for
38
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vmovi(TCGContext *s, TCGReg rd,
76
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
39
| (extract32(imm8, 7, 1) << 24));
77
index XXXXXXX..XXXXXXX 100644
78
--- a/accel/tcg/cputlb.c
79
+++ b/accel/tcg/cputlb.c
80
@@ -XXX,XX +XXX,XX @@ static void tlb_add_large_page(CPUArchState *env, int mmu_idx,
81
env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask;
40
}
82
}
41
83
42
+static void tcg_out_vshifti(TCGContext *s, ARMInsn insn, int q,
84
-/* Add a new TLB entry. At most one entry for a given virtual address
43
+ TCGReg rd, TCGReg rm, int l_imm6)
85
+/*
86
+ * Add a new TLB entry. At most one entry for a given virtual address
87
* is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
88
* supplied size is only used by tlb_flush_page.
89
*
90
* Called from TCG-generated code, which is under an RCU read-side
91
* critical section.
92
*/
93
-void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
94
- hwaddr paddr, MemTxAttrs attrs, int prot,
95
- int mmu_idx, target_ulong size)
96
+void tlb_set_page_full(CPUState *cpu, int mmu_idx,
97
+ target_ulong vaddr, CPUTLBEntryFull *full)
98
{
99
CPUArchState *env = cpu->env_ptr;
100
CPUTLB *tlb = env_tlb(env);
101
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
102
CPUTLBEntry *te, tn;
103
hwaddr iotlb, xlat, sz, paddr_page;
104
target_ulong vaddr_page;
105
- int asidx = cpu_asidx_from_attrs(cpu, attrs);
106
- int wp_flags;
107
+ int asidx, wp_flags, prot;
108
bool is_ram, is_romd;
109
110
assert_cpu_is_self(cpu);
111
112
- if (size <= TARGET_PAGE_SIZE) {
113
+ if (full->lg_page_size <= TARGET_PAGE_BITS) {
114
sz = TARGET_PAGE_SIZE;
115
} else {
116
- tlb_add_large_page(env, mmu_idx, vaddr, size);
117
- sz = size;
118
+ sz = (hwaddr)1 << full->lg_page_size;
119
+ tlb_add_large_page(env, mmu_idx, vaddr, sz);
120
}
121
vaddr_page = vaddr & TARGET_PAGE_MASK;
122
- paddr_page = paddr & TARGET_PAGE_MASK;
123
+ paddr_page = full->phys_addr & TARGET_PAGE_MASK;
124
125
+ prot = full->prot;
126
+ asidx = cpu_asidx_from_attrs(cpu, full->attrs);
127
section = address_space_translate_for_iotlb(cpu, asidx, paddr_page,
128
- &xlat, &sz, attrs, &prot);
129
+ &xlat, &sz, full->attrs, &prot);
130
assert(sz >= TARGET_PAGE_SIZE);
131
132
tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
133
" prot=%x idx=%d\n",
134
- vaddr, paddr, prot, mmu_idx);
135
+ vaddr, full->phys_addr, prot, mmu_idx);
136
137
address = vaddr_page;
138
- if (size < TARGET_PAGE_SIZE) {
139
+ if (full->lg_page_size < TARGET_PAGE_BITS) {
140
/* Repeat the MMU check and TLB fill on every access. */
141
address |= TLB_INVALID_MASK;
142
}
143
- if (attrs.byte_swap) {
144
+ if (full->attrs.byte_swap) {
145
address |= TLB_BSWAP;
146
}
147
148
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
149
* subtract here is that of the page base, and not the same as the
150
* vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
151
*/
152
+ desc->fulltlb[index] = *full;
153
desc->fulltlb[index].xlat_section = iotlb - vaddr_page;
154
- desc->fulltlb[index].attrs = attrs;
155
+ desc->fulltlb[index].phys_addr = paddr_page;
156
+ desc->fulltlb[index].prot = prot;
157
158
/* Now calculate the new entry */
159
tn.addend = addend - vaddr_page;
160
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
161
qemu_spin_unlock(&tlb->c.lock);
162
}
163
164
-/* Add a new TLB entry, but without specifying the memory
165
- * transaction attributes to be used.
166
- */
167
+void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
168
+ hwaddr paddr, MemTxAttrs attrs, int prot,
169
+ int mmu_idx, target_ulong size)
44
+{
170
+{
45
+ tcg_out32(s, insn | (q << 6) | encode_vd(rd) | encode_vm(rm) |
171
+ CPUTLBEntryFull full = {
46
+ (extract32(l_imm6, 6, 1) << 7) |
172
+ .phys_addr = paddr,
47
+ (extract32(l_imm6, 0, 6) << 16));
173
+ .attrs = attrs,
174
+ .prot = prot,
175
+ .lg_page_size = ctz64(size)
176
+ };
177
+
178
+ assert(is_power_of_2(size));
179
+ tlb_set_page_full(cpu, mmu_idx, vaddr, &full);
48
+}
180
+}
49
+
181
+
50
static void tcg_out_vldst(TCGContext *s, ARMInsn insn,
182
void tlb_set_page(CPUState *cpu, target_ulong vaddr,
51
TCGReg rd, TCGReg rn, int offset)
183
hwaddr paddr, int prot,
52
{
184
int mmu_idx, target_ulong size)
53
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
54
case INDEX_op_abs_vec:
55
case INDEX_op_neg_vec:
56
case INDEX_op_not_vec:
57
+ case INDEX_op_shli_vec:
58
+ case INDEX_op_shri_vec:
59
+ case INDEX_op_sari_vec:
60
return C_O1_I1(w, w);
61
case INDEX_op_dup2_vec:
62
case INDEX_op_add_vec:
63
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
64
case INDEX_op_xor_vec:
65
tcg_out_vreg3(s, INSN_VEOR, q, 0, a0, a1, a2);
66
return;
67
+ case INDEX_op_shli_vec:
68
+ tcg_out_vshifti(s, INSN_VSHLI, q, a0, a1, a2 + (8 << vece));
69
+ return;
70
+ case INDEX_op_shri_vec:
71
+ tcg_out_vshifti(s, INSN_VSHRI, q, a0, a1, (16 << vece) - a2);
72
+ return;
73
+ case INDEX_op_sari_vec:
74
+ tcg_out_vshifti(s, INSN_VSARI, q, a0, a1, (16 << vece) - a2);
75
+ return;
76
77
case INDEX_op_andc_vec:
78
if (!const_args[2]) {
79
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
80
case INDEX_op_orc_vec:
81
case INDEX_op_xor_vec:
82
case INDEX_op_not_vec:
83
+ case INDEX_op_shli_vec:
84
+ case INDEX_op_shri_vec:
85
+ case INDEX_op_sari_vec:
86
return 1;
87
case INDEX_op_abs_vec:
88
case INDEX_op_cmp_vec:
89
--
185
--
90
2.25.1
186
2.34.1
91
187
92
188
diff view generated by jsdifflib
1
This is saturating add and subtract, signed and unsigned.
1
Allow the target to cache items from the guest page tables.
2
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
7
---
6
tcg/arm/tcg-target.h | 2 +-
8
include/exec/cpu-defs.h | 9 +++++++++
7
tcg/arm/tcg-target.c.inc | 24 ++++++++++++++++++++++++
9
1 file changed, 9 insertions(+)
8
2 files changed, 25 insertions(+), 1 deletion(-)
9
10
10
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
11
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
11
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/arm/tcg-target.h
13
--- a/include/exec/cpu-defs.h
13
+++ b/tcg/arm/tcg-target.h
14
+++ b/include/exec/cpu-defs.h
14
@@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions;
15
@@ -XXX,XX +XXX,XX @@ typedef struct CPUTLBEntryFull {
15
#define TCG_TARGET_HAS_shs_vec 0
16
16
#define TCG_TARGET_HAS_shv_vec 0
17
/* @lg_page_size contains the log2 of the page size. */
17
#define TCG_TARGET_HAS_mul_vec 1
18
uint8_t lg_page_size;
18
-#define TCG_TARGET_HAS_sat_vec 0
19
+
19
+#define TCG_TARGET_HAS_sat_vec 1
20
+ /*
20
#define TCG_TARGET_HAS_minmax_vec 0
21
+ * Allow target-specific additions to this structure.
21
#define TCG_TARGET_HAS_bitsel_vec 0
22
+ * This may be used to cache items from the guest cpu
22
#define TCG_TARGET_HAS_cmpsel_vec 0
23
+ * page tables for later use by the implementation.
23
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
24
+ */
24
index XXXXXXX..XXXXXXX 100644
25
+#ifdef TARGET_PAGE_ENTRY_EXTRA
25
--- a/tcg/arm/tcg-target.c.inc
26
+ TARGET_PAGE_ENTRY_EXTRA
26
+++ b/tcg/arm/tcg-target.c.inc
27
+#endif
27
@@ -XXX,XX +XXX,XX @@ typedef enum {
28
} CPUTLBEntryFull;
28
INSN_VORR = 0xf2200110,
29
29
INSN_VSUB = 0xf3000800,
30
/*
30
INSN_VMUL = 0xf2000910,
31
+ INSN_VQADD = 0xf2000010,
32
+ INSN_VQADD_U = 0xf3000010,
33
+ INSN_VQSUB = 0xf2000210,
34
+ INSN_VQSUB_U = 0xf3000210,
35
36
INSN_VABS = 0xf3b10300,
37
INSN_VMVN = 0xf3b00580,
38
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
39
case INDEX_op_dup2_vec:
40
case INDEX_op_add_vec:
41
case INDEX_op_mul_vec:
42
+ case INDEX_op_ssadd_vec:
43
+ case INDEX_op_sssub_vec:
44
case INDEX_op_sub_vec:
45
+ case INDEX_op_usadd_vec:
46
+ case INDEX_op_ussub_vec:
47
case INDEX_op_xor_vec:
48
return C_O1_I2(w, w, w);
49
case INDEX_op_or_vec:
50
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
51
case INDEX_op_sub_vec:
52
tcg_out_vreg3(s, INSN_VSUB, q, vece, a0, a1, a2);
53
return;
54
+ case INDEX_op_ssadd_vec:
55
+ tcg_out_vreg3(s, INSN_VQADD, q, vece, a0, a1, a2);
56
+ return;
57
+ case INDEX_op_sssub_vec:
58
+ tcg_out_vreg3(s, INSN_VQSUB, q, vece, a0, a1, a2);
59
+ return;
60
+ case INDEX_op_usadd_vec:
61
+ tcg_out_vreg3(s, INSN_VQADD_U, q, vece, a0, a1, a2);
62
+ return;
63
+ case INDEX_op_ussub_vec:
64
+ tcg_out_vreg3(s, INSN_VQSUB_U, q, vece, a0, a1, a2);
65
+ return;
66
case INDEX_op_xor_vec:
67
tcg_out_vreg3(s, INSN_VEOR, q, 0, a0, a1, a2);
68
return;
69
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
70
case INDEX_op_shli_vec:
71
case INDEX_op_shri_vec:
72
case INDEX_op_sari_vec:
73
+ case INDEX_op_ssadd_vec:
74
+ case INDEX_op_sssub_vec:
75
+ case INDEX_op_usadd_vec:
76
+ case INDEX_op_ussub_vec:
77
return 1;
78
case INDEX_op_abs_vec:
79
case INDEX_op_cmp_vec:
80
--
31
--
81
2.25.1
32
2.34.1
82
33
83
34
diff view generated by jsdifflib
New patch
1
This bitmap is created and discarded immediately.
2
We gain nothing by its existence.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Message-Id: <20220822232338.1727934-2-richard.henderson@linaro.org>
7
---
8
accel/tcg/translate-all.c | 78 ++-------------------------------------
9
1 file changed, 4 insertions(+), 74 deletions(-)
10
11
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/accel/tcg/translate-all.c
14
+++ b/accel/tcg/translate-all.c
15
@@ -XXX,XX +XXX,XX @@
16
#define assert_memory_lock() tcg_debug_assert(have_mmap_lock())
17
#endif
18
19
-#define SMC_BITMAP_USE_THRESHOLD 10
20
-
21
typedef struct PageDesc {
22
/* list of TBs intersecting this ram page */
23
uintptr_t first_tb;
24
-#ifdef CONFIG_SOFTMMU
25
- /* in order to optimize self modifying code, we count the number
26
- of lookups we do to a given page to use a bitmap */
27
- unsigned long *code_bitmap;
28
- unsigned int code_write_count;
29
-#else
30
+#ifdef CONFIG_USER_ONLY
31
unsigned long flags;
32
void *target_data;
33
#endif
34
-#ifndef CONFIG_USER_ONLY
35
+#ifdef CONFIG_SOFTMMU
36
QemuSpin lock;
37
#endif
38
} PageDesc;
39
@@ -XXX,XX +XXX,XX @@ void tb_htable_init(void)
40
qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
41
}
42
43
-/* call with @p->lock held */
44
-static inline void invalidate_page_bitmap(PageDesc *p)
45
-{
46
- assert_page_locked(p);
47
-#ifdef CONFIG_SOFTMMU
48
- g_free(p->code_bitmap);
49
- p->code_bitmap = NULL;
50
- p->code_write_count = 0;
51
-#endif
52
-}
53
-
54
/* Set to NULL all the 'first_tb' fields in all PageDescs. */
55
static void page_flush_tb_1(int level, void **lp)
56
{
57
@@ -XXX,XX +XXX,XX @@ static void page_flush_tb_1(int level, void **lp)
58
for (i = 0; i < V_L2_SIZE; ++i) {
59
page_lock(&pd[i]);
60
pd[i].first_tb = (uintptr_t)NULL;
61
- invalidate_page_bitmap(pd + i);
62
page_unlock(&pd[i]);
63
}
64
} else {
65
@@ -XXX,XX +XXX,XX @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
66
if (rm_from_page_list) {
67
p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
68
tb_page_remove(p, tb);
69
- invalidate_page_bitmap(p);
70
if (tb->page_addr[1] != -1) {
71
p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
72
tb_page_remove(p, tb);
73
- invalidate_page_bitmap(p);
74
}
75
}
76
77
@@ -XXX,XX +XXX,XX @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
78
}
79
}
80
81
-#ifdef CONFIG_SOFTMMU
82
-/* call with @p->lock held */
83
-static void build_page_bitmap(PageDesc *p)
84
-{
85
- int n, tb_start, tb_end;
86
- TranslationBlock *tb;
87
-
88
- assert_page_locked(p);
89
- p->code_bitmap = bitmap_new(TARGET_PAGE_SIZE);
90
-
91
- PAGE_FOR_EACH_TB(p, tb, n) {
92
- /* NOTE: this is subtle as a TB may span two physical pages */
93
- if (n == 0) {
94
- /* NOTE: tb_end may be after the end of the page, but
95
- it is not a problem */
96
- tb_start = tb->pc & ~TARGET_PAGE_MASK;
97
- tb_end = tb_start + tb->size;
98
- if (tb_end > TARGET_PAGE_SIZE) {
99
- tb_end = TARGET_PAGE_SIZE;
100
- }
101
- } else {
102
- tb_start = 0;
103
- tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
104
- }
105
- bitmap_set(p->code_bitmap, tb_start, tb_end - tb_start);
106
- }
107
-}
108
-#endif
109
-
110
/* add the tb in the target page and protect it if necessary
111
*
112
* Called with mmap_lock held for user-mode emulation.
113
@@ -XXX,XX +XXX,XX @@ static inline void tb_page_add(PageDesc *p, TranslationBlock *tb,
114
page_already_protected = p->first_tb != (uintptr_t)NULL;
115
#endif
116
p->first_tb = (uintptr_t)tb | n;
117
- invalidate_page_bitmap(p);
118
119
#if defined(CONFIG_USER_ONLY)
120
/* translator_loop() must have made all TB pages non-writable */
121
@@ -XXX,XX +XXX,XX @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
122
/* remove TB from the page(s) if we couldn't insert it */
123
if (unlikely(existing_tb)) {
124
tb_page_remove(p, tb);
125
- invalidate_page_bitmap(p);
126
if (p2) {
127
tb_page_remove(p2, tb);
128
- invalidate_page_bitmap(p2);
129
}
130
tb = existing_tb;
131
}
132
@@ -XXX,XX +XXX,XX @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages,
133
#if !defined(CONFIG_USER_ONLY)
134
/* if no code remaining, no need to continue to use slow writes */
135
if (!p->first_tb) {
136
- invalidate_page_bitmap(p);
137
tlb_unprotect_code(start);
138
}
139
#endif
140
@@ -XXX,XX +XXX,XX @@ void tb_invalidate_phys_page_fast(struct page_collection *pages,
141
}
142
143
assert_page_locked(p);
144
- if (!p->code_bitmap &&
145
- ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD) {
146
- build_page_bitmap(p);
147
- }
148
- if (p->code_bitmap) {
149
- unsigned int nr;
150
- unsigned long b;
151
-
152
- nr = start & ~TARGET_PAGE_MASK;
153
- b = p->code_bitmap[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG - 1));
154
- if (b & ((1 << len) - 1)) {
155
- goto do_invalidate;
156
- }
157
- } else {
158
- do_invalidate:
159
- tb_invalidate_phys_page_range__locked(pages, p, start, start + len,
160
- retaddr);
161
- }
162
+ tb_invalidate_phys_page_range__locked(pages, p, start, start + len,
163
+ retaddr);
164
}
165
#else
166
/* Called with mmap_lock held. If pc is not 0 then it indicates the
167
--
168
2.34.1
169
170
diff view generated by jsdifflib
1
Implementing dup2, add, sub, and, or, xor as the minimal set.
1
Bool is more appropriate type for the alloc parameter.
2
This allows us to actually enable neon in the header file.
3
2
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
6
---
7
tcg/arm/tcg-target-con-set.h | 3 +
7
accel/tcg/translate-all.c | 14 +++++++-------
8
tcg/arm/tcg-target-con-str.h | 2 +
8
1 file changed, 7 insertions(+), 7 deletions(-)
9
tcg/arm/tcg-target.h | 6 +-
10
tcg/arm/tcg-target.c.inc | 201 +++++++++++++++++++++++++++++++++--
11
4 files changed, 204 insertions(+), 8 deletions(-)
12
9
13
diff --git a/tcg/arm/tcg-target-con-set.h b/tcg/arm/tcg-target-con-set.h
10
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
14
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/arm/tcg-target-con-set.h
12
--- a/accel/tcg/translate-all.c
16
+++ b/tcg/arm/tcg-target-con-set.h
13
+++ b/accel/tcg/translate-all.c
17
@@ -XXX,XX +XXX,XX @@ C_O1_I2(r, r, rIN)
14
@@ -XXX,XX +XXX,XX @@ void page_init(void)
18
C_O1_I2(r, r, ri)
19
C_O1_I2(r, rZ, rZ)
20
C_O1_I2(w, w, w)
21
+C_O1_I2(w, w, wO)
22
+C_O1_I2(w, w, wV)
23
+C_O1_I2(w, w, wZ)
24
C_O1_I4(r, r, r, rI, rI)
25
C_O1_I4(r, r, rIN, rIK, 0)
26
C_O2_I1(r, r, l)
27
diff --git a/tcg/arm/tcg-target-con-str.h b/tcg/arm/tcg-target-con-str.h
28
index XXXXXXX..XXXXXXX 100644
29
--- a/tcg/arm/tcg-target-con-str.h
30
+++ b/tcg/arm/tcg-target-con-str.h
31
@@ -XXX,XX +XXX,XX @@ REGS('w', ALL_VECTOR_REGS)
32
CONST('I', TCG_CT_CONST_ARM)
33
CONST('K', TCG_CT_CONST_INV)
34
CONST('N', TCG_CT_CONST_NEG)
35
+CONST('O', TCG_CT_CONST_ORRI)
36
+CONST('V', TCG_CT_CONST_ANDI)
37
CONST('Z', TCG_CT_CONST_ZERO)
38
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
39
index XXXXXXX..XXXXXXX 100644
40
--- a/tcg/arm/tcg-target.h
41
+++ b/tcg/arm/tcg-target.h
42
@@ -XXX,XX +XXX,XX @@ typedef enum {
43
#else
44
extern bool use_idiv_instructions;
45
#endif
15
#endif
46
-#define use_neon_instructions 0
47
+#ifdef __ARM_NEON__
48
+#define use_neon_instructions 1
49
+#else
50
+extern bool use_neon_instructions;
51
+#endif
52
53
/* used for function call generation */
54
#define TCG_TARGET_STACK_ALIGN        8
55
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
56
index XXXXXXX..XXXXXXX 100644
57
--- a/tcg/arm/tcg-target.c.inc
58
+++ b/tcg/arm/tcg-target.c.inc
59
@@ -XXX,XX +XXX,XX @@ int arm_arch = __ARM_ARCH;
60
#ifndef use_idiv_instructions
61
bool use_idiv_instructions;
62
#endif
63
+#ifndef use_neon_instructions
64
+bool use_neon_instructions;
65
+#endif
66
67
/* ??? Ought to think about changing CONFIG_SOFTMMU to always defined. */
68
#ifdef CONFIG_SOFTMMU
69
@@ -XXX,XX +XXX,XX @@ typedef enum {
70
/* Otherwise the assembler uses mov r0,r0 */
71
INSN_NOP_v4 = (COND_AL << 28) | ARITH_MOV,
72
73
+ INSN_VADD = 0xf2000800,
74
+ INSN_VAND = 0xf2000110,
75
+ INSN_VEOR = 0xf3000110,
76
INSN_VORR = 0xf2200110,
77
+ INSN_VSUB = 0xf3000800,
78
+
79
+ INSN_VMVN = 0xf3b00580,
80
+
81
+ INSN_VCEQ0 = 0xf3b10100,
82
+ INSN_VCGT0 = 0xf3b10000,
83
+ INSN_VCGE0 = 0xf3b10080,
84
+ INSN_VCLE0 = 0xf3b10180,
85
+ INSN_VCLT0 = 0xf3b10200,
86
+
87
+ INSN_VCEQ = 0xf3000810,
88
+ INSN_VCGE = 0xf2000310,
89
+ INSN_VCGT = 0xf2000300,
90
+ INSN_VCGE_U = 0xf3000310,
91
+ INSN_VCGT_U = 0xf3000300,
92
+
93
+ INSN_VTST = 0xf2000810,
94
95
INSN_VDUP_G = 0xee800b10, /* VDUP (ARM core register) */
96
INSN_VDUP_S = 0xf3b00c00, /* VDUP (scalar) */
97
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
98
#define TCG_CT_CONST_INV 0x200
99
#define TCG_CT_CONST_NEG 0x400
100
#define TCG_CT_CONST_ZERO 0x800
101
+#define TCG_CT_CONST_ORRI 0x1000
102
+#define TCG_CT_CONST_ANDI 0x2000
103
104
#define ALL_GENERAL_REGS 0xffffu
105
#define ALL_VECTOR_REGS 0xffff0000u
106
@@ -XXX,XX +XXX,XX @@ static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
107
return i;
108
}
16
}
109
17
110
+/* Return true if V is a valid 16-bit or 32-bit shifted immediate. */
18
-static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
111
+static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
19
+static PageDesc *page_find_alloc(tb_page_addr_t index, bool alloc)
112
+{
20
{
113
+ if (v32 == deposit32(v32, 16, 16, v32)) {
21
PageDesc *pd;
114
+ return is_shimm16(v32, cmode, imm8);
22
void **lp;
115
+ } else {
23
@@ -XXX,XX +XXX,XX @@ static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
116
+ return is_shimm32(v32, cmode, imm8);
24
117
+ }
25
static inline PageDesc *page_find(tb_page_addr_t index)
118
+}
26
{
119
+
27
- return page_find_alloc(index, 0);
120
/* Test if a constant matches the constraint.
28
+ return page_find_alloc(index, false);
121
* TODO: define constraints for:
122
*
123
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
124
return 1;
125
} else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
126
return 1;
127
- } else {
128
- return 0;
129
}
130
+
131
+ switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
132
+ case 0:
133
+ break;
134
+ case TCG_CT_CONST_ANDI:
135
+ val = ~val;
136
+ /* fallthru */
137
+ case TCG_CT_CONST_ORRI:
138
+ if (val == deposit64(val, 32, 32, val)) {
139
+ int cmode, imm8;
140
+ return is_shimm1632(val, &cmode, &imm8);
141
+ }
142
+ break;
143
+ default:
144
+ /* Both bits should not be set for the same insn. */
145
+ g_assert_not_reached();
146
+ }
147
+
148
+ return 0;
149
}
29
}
150
30
151
static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset)
31
static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
152
@@ -XXX,XX +XXX,XX @@ static uint32_t encode_vm(TCGReg rm)
32
- PageDesc **ret_p2, tb_page_addr_t phys2, int alloc);
153
return (extract32(rm, 3, 1) << 5) | (extract32(rm, 0, 3) << 1);
33
+ PageDesc **ret_p2, tb_page_addr_t phys2, bool alloc);
34
35
/* In user-mode page locks aren't used; mmap_lock is enough */
36
#ifdef CONFIG_USER_ONLY
37
@@ -XXX,XX +XXX,XX @@ static inline void page_unlock(PageDesc *pd)
38
/* lock the page(s) of a TB in the correct acquisition order */
39
static inline void page_lock_tb(const TranslationBlock *tb)
40
{
41
- page_lock_pair(NULL, tb->page_addr[0], NULL, tb->page_addr[1], 0);
42
+ page_lock_pair(NULL, tb->page_addr[0], NULL, tb->page_addr[1], false);
154
}
43
}
155
44
156
+static void tcg_out_vreg2(TCGContext *s, ARMInsn insn, int q, int vece,
45
static inline void page_unlock_tb(const TranslationBlock *tb)
157
+ TCGReg d, TCGReg m)
46
@@ -XXX,XX +XXX,XX @@ void page_collection_unlock(struct page_collection *set)
158
+{
47
#endif /* !CONFIG_USER_ONLY */
159
+ tcg_out32(s, insn | (vece << 18) | (q << 6) |
48
160
+ encode_vd(d) | encode_vm(m));
49
static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
161
+}
50
- PageDesc **ret_p2, tb_page_addr_t phys2, int alloc)
162
+
51
+ PageDesc **ret_p2, tb_page_addr_t phys2, bool alloc)
163
static void tcg_out_vreg3(TCGContext *s, ARMInsn insn, int q, int vece,
164
TCGReg d, TCGReg n, TCGReg m)
165
{
52
{
166
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
53
PageDesc *p1, *p2;
167
case INDEX_op_add_vec:
54
tb_page_addr_t page1;
168
case INDEX_op_sub_vec:
55
@@ -XXX,XX +XXX,XX @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
169
case INDEX_op_xor_vec:
56
* Note that inserting into the hash table first isn't an option, since
170
- case INDEX_op_or_vec:
57
* we can only insert TBs that are fully initialized.
171
- case INDEX_op_and_vec:
58
*/
172
- case INDEX_op_cmp_vec:
59
- page_lock_pair(&p, phys_pc, &p2, phys_page2, 1);
173
return C_O1_I2(w, w, w);
60
+ page_lock_pair(&p, phys_pc, &p2, phys_page2, true);
174
+ case INDEX_op_or_vec:
61
tb_page_add(p, tb, 0, phys_pc & TARGET_PAGE_MASK);
175
+ return C_O1_I2(w, w, wO);
62
if (p2) {
176
+ case INDEX_op_and_vec:
63
tb_page_add(p2, tb, 1, phys_page2);
177
+ return C_O1_I2(w, w, wV);
64
@@ -XXX,XX +XXX,XX @@ void page_set_flags(target_ulong start, target_ulong end, int flags)
178
+ case INDEX_op_cmp_vec:
65
for (addr = start, len = end - start;
179
+ return C_O1_I2(w, w, wZ);
66
len != 0;
180
67
len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
181
default:
68
- PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
182
g_assert_not_reached();
69
+ PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, true);
183
@@ -XXX,XX +XXX,XX @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
70
184
}
71
/* If the write protection bit is set, then we invalidate
185
}
72
the code inside. */
186
187
+static const ARMInsn vec_cmp_insn[16] = {
188
+ [TCG_COND_EQ] = INSN_VCEQ,
189
+ [TCG_COND_GT] = INSN_VCGT,
190
+ [TCG_COND_GE] = INSN_VCGE,
191
+ [TCG_COND_GTU] = INSN_VCGT_U,
192
+ [TCG_COND_GEU] = INSN_VCGE_U,
193
+};
194
+
195
+static const ARMInsn vec_cmp0_insn[16] = {
196
+ [TCG_COND_EQ] = INSN_VCEQ0,
197
+ [TCG_COND_GT] = INSN_VCGT0,
198
+ [TCG_COND_GE] = INSN_VCGE0,
199
+ [TCG_COND_LT] = INSN_VCLT0,
200
+ [TCG_COND_LE] = INSN_VCLE0,
201
+};
202
+
203
static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
204
unsigned vecl, unsigned vece,
205
const TCGArg *args, const int *const_args)
206
{
207
- g_assert_not_reached();
208
+ TCGType type = vecl + TCG_TYPE_V64;
209
+ unsigned q = vecl;
210
+ TCGArg a0, a1, a2;
211
+ int cmode, imm8;
212
+
213
+ a0 = args[0];
214
+ a1 = args[1];
215
+ a2 = args[2];
216
+
217
+ switch (opc) {
218
+ case INDEX_op_ld_vec:
219
+ tcg_out_ld(s, type, a0, a1, a2);
220
+ return;
221
+ case INDEX_op_st_vec:
222
+ tcg_out_st(s, type, a0, a1, a2);
223
+ return;
224
+ case INDEX_op_dupm_vec:
225
+ tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
226
+ return;
227
+ case INDEX_op_dup2_vec:
228
+ tcg_out_dup2_vec(s, a0, a1, a2);
229
+ return;
230
+ case INDEX_op_add_vec:
231
+ tcg_out_vreg3(s, INSN_VADD, q, vece, a0, a1, a2);
232
+ return;
233
+ case INDEX_op_sub_vec:
234
+ tcg_out_vreg3(s, INSN_VSUB, q, vece, a0, a1, a2);
235
+ return;
236
+ case INDEX_op_xor_vec:
237
+ tcg_out_vreg3(s, INSN_VEOR, q, 0, a0, a1, a2);
238
+ return;
239
+
240
+ case INDEX_op_and_vec:
241
+ if (const_args[2]) {
242
+ is_shimm1632(~a2, &cmode, &imm8);
243
+ if (a0 == a1) {
244
+ tcg_out_vmovi(s, a0, q, 1, cmode | 1, imm8); /* VBICI */
245
+ return;
246
+ }
247
+ tcg_out_vmovi(s, a0, q, 1, cmode, imm8); /* VMVNI */
248
+ a2 = a0;
249
+ }
250
+ tcg_out_vreg3(s, INSN_VAND, q, 0, a0, a1, a2);
251
+ return;
252
+
253
+ case INDEX_op_or_vec:
254
+ if (const_args[2]) {
255
+ is_shimm1632(a2, &cmode, &imm8);
256
+ if (a0 == a1) {
257
+ tcg_out_vmovi(s, a0, q, 0, cmode | 1, imm8); /* VORRI */
258
+ return;
259
+ }
260
+ tcg_out_vmovi(s, a0, q, 0, cmode, imm8); /* VMOVI */
261
+ a2 = a0;
262
+ }
263
+ tcg_out_vreg3(s, INSN_VORR, q, 0, a0, a1, a2);
264
+ return;
265
+
266
+ case INDEX_op_cmp_vec:
267
+ {
268
+ TCGCond cond = args[3];
269
+
270
+ if (cond == TCG_COND_NE) {
271
+ if (const_args[2]) {
272
+ tcg_out_vreg3(s, INSN_VTST, q, vece, a0, a1, a1);
273
+ } else {
274
+ tcg_out_vreg3(s, INSN_VCEQ, q, vece, a0, a1, a2);
275
+ tcg_out_vreg2(s, INSN_VMVN, q, 0, a0, a0);
276
+ }
277
+ } else {
278
+ ARMInsn insn;
279
+
280
+ if (const_args[2]) {
281
+ insn = vec_cmp0_insn[cond];
282
+ if (insn) {
283
+ tcg_out_vreg2(s, insn, q, vece, a0, a1);
284
+ return;
285
+ }
286
+ tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
287
+ a2 = TCG_VEC_TMP;
288
+ }
289
+ insn = vec_cmp_insn[cond];
290
+ if (insn == 0) {
291
+ TCGArg t;
292
+ t = a1, a1 = a2, a2 = t;
293
+ cond = tcg_swap_cond(cond);
294
+ insn = vec_cmp_insn[cond];
295
+ tcg_debug_assert(insn != 0);
296
+ }
297
+ tcg_out_vreg3(s, insn, q, vece, a0, a1, a2);
298
+ }
299
+ }
300
+ return;
301
+
302
+ case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
303
+ case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
304
+ default:
305
+ g_assert_not_reached();
306
+ }
307
}
308
309
int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
310
{
311
- return 0;
312
+ switch (opc) {
313
+ case INDEX_op_add_vec:
314
+ case INDEX_op_sub_vec:
315
+ case INDEX_op_and_vec:
316
+ case INDEX_op_or_vec:
317
+ case INDEX_op_xor_vec:
318
+ return 1;
319
+ case INDEX_op_cmp_vec:
320
+ return vece < MO_64;
321
+ default:
322
+ return 0;
323
+ }
324
}
325
326
void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
327
--
73
--
328
2.25.1
74
2.34.1
329
75
330
76
diff view generated by jsdifflib
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
1
Use the pc coming from db->pc_first rather than the TB.
2
3
Use the cached host_addr rather than re-computing for the
4
first page. We still need a separate lookup for the second
5
page because it won't be computed for DisasContextBase until
6
the translator actually performs a read from the page.
7
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
10
---
4
tcg/arm/tcg-target.h | 2 +-
11
include/exec/plugin-gen.h | 7 ++++---
5
tcg/arm/tcg-target.c.inc | 6 ++++++
12
accel/tcg/plugin-gen.c | 22 +++++++++++-----------
6
2 files changed, 7 insertions(+), 1 deletion(-)
13
accel/tcg/translator.c | 2 +-
14
3 files changed, 16 insertions(+), 15 deletions(-)
7
15
8
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
16
diff --git a/include/exec/plugin-gen.h b/include/exec/plugin-gen.h
9
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/arm/tcg-target.h
18
--- a/include/exec/plugin-gen.h
11
+++ b/tcg/arm/tcg-target.h
19
+++ b/include/exec/plugin-gen.h
12
@@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions;
20
@@ -XXX,XX +XXX,XX @@ struct DisasContextBase;
13
#define TCG_TARGET_HAS_shi_vec 1
21
14
#define TCG_TARGET_HAS_shs_vec 0
22
#ifdef CONFIG_PLUGIN
15
#define TCG_TARGET_HAS_shv_vec 0
23
16
-#define TCG_TARGET_HAS_mul_vec 0
24
-bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool supress);
17
+#define TCG_TARGET_HAS_mul_vec 1
25
+bool plugin_gen_tb_start(CPUState *cpu, const struct DisasContextBase *db,
18
#define TCG_TARGET_HAS_sat_vec 0
26
+ bool supress);
19
#define TCG_TARGET_HAS_minmax_vec 0
27
void plugin_gen_tb_end(CPUState *cpu);
20
#define TCG_TARGET_HAS_bitsel_vec 0
28
void plugin_gen_insn_start(CPUState *cpu, const struct DisasContextBase *db);
21
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
29
void plugin_gen_insn_end(void);
30
@@ -XXX,XX +XXX,XX @@ static inline void plugin_insn_append(abi_ptr pc, const void *from, size_t size)
31
32
#else /* !CONFIG_PLUGIN */
33
34
-static inline
35
-bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool supress)
36
+static inline bool
37
+plugin_gen_tb_start(CPUState *cpu, const struct DisasContextBase *db, bool sup)
38
{
39
return false;
40
}
41
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
22
index XXXXXXX..XXXXXXX 100644
42
index XXXXXXX..XXXXXXX 100644
23
--- a/tcg/arm/tcg-target.c.inc
43
--- a/accel/tcg/plugin-gen.c
24
+++ b/tcg/arm/tcg-target.c.inc
44
+++ b/accel/tcg/plugin-gen.c
25
@@ -XXX,XX +XXX,XX @@ typedef enum {
45
@@ -XXX,XX +XXX,XX @@ static void plugin_gen_inject(const struct qemu_plugin_tb *plugin_tb)
26
INSN_VORN = 0xf2300110,
46
pr_ops();
27
INSN_VORR = 0xf2200110,
47
}
28
INSN_VSUB = 0xf3000800,
48
29
+ INSN_VMUL = 0xf2000910,
49
-bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool mem_only)
30
50
+bool plugin_gen_tb_start(CPUState *cpu, const DisasContextBase *db,
31
INSN_VABS = 0xf3b10300,
51
+ bool mem_only)
32
INSN_VMVN = 0xf3b00580,
52
{
33
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
53
bool ret = false;
34
return C_O1_I1(w, w);
54
35
case INDEX_op_dup2_vec:
55
@@ -XXX,XX +XXX,XX @@ bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool mem_onl
36
case INDEX_op_add_vec:
56
37
+ case INDEX_op_mul_vec:
57
ret = true;
38
case INDEX_op_sub_vec:
58
39
case INDEX_op_xor_vec:
59
- ptb->vaddr = tb->pc;
40
return C_O1_I2(w, w, w);
60
+ ptb->vaddr = db->pc_first;
41
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
61
ptb->vaddr2 = -1;
42
case INDEX_op_add_vec:
62
- get_page_addr_code_hostp(cpu->env_ptr, tb->pc, &ptb->haddr1);
43
tcg_out_vreg3(s, INSN_VADD, q, vece, a0, a1, a2);
63
+ ptb->haddr1 = db->host_addr[0];
44
return;
64
ptb->haddr2 = NULL;
45
+ case INDEX_op_mul_vec:
65
ptb->mem_only = mem_only;
46
+ tcg_out_vreg3(s, INSN_VMUL, q, vece, a0, a1, a2);
66
47
+ return;
67
@@ -XXX,XX +XXX,XX @@ void plugin_gen_insn_start(CPUState *cpu, const DisasContextBase *db)
48
case INDEX_op_sub_vec:
68
* Note that we skip this when haddr1 == NULL, e.g. when we're
49
tcg_out_vreg3(s, INSN_VSUB, q, vece, a0, a1, a2);
69
* fetching instructions from a region not backed by RAM.
50
return;
70
*/
51
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
71
- if (likely(ptb->haddr1 != NULL && ptb->vaddr2 == -1) &&
52
return 1;
72
- unlikely((db->pc_next & TARGET_PAGE_MASK) !=
53
case INDEX_op_abs_vec:
73
- (db->pc_first & TARGET_PAGE_MASK))) {
54
case INDEX_op_cmp_vec:
74
- get_page_addr_code_hostp(cpu->env_ptr, db->pc_next,
55
+ case INDEX_op_mul_vec:
75
- &ptb->haddr2);
56
case INDEX_op_neg_vec:
76
- ptb->vaddr2 = db->pc_next;
57
return vece < MO_64;
77
- }
58
default:
78
- if (likely(ptb->vaddr2 == -1)) {
79
+ if (ptb->haddr1 == NULL) {
80
+ pinsn->haddr = NULL;
81
+ } else if (is_same_page(db, db->pc_next)) {
82
pinsn->haddr = ptb->haddr1 + pinsn->vaddr - ptb->vaddr;
83
} else {
84
+ if (ptb->vaddr2 == -1) {
85
+ ptb->vaddr2 = TARGET_PAGE_ALIGN(db->pc_first);
86
+ get_page_addr_code_hostp(cpu->env_ptr, ptb->vaddr2, &ptb->haddr2);
87
+ }
88
pinsn->haddr = ptb->haddr2 + pinsn->vaddr - ptb->vaddr2;
89
}
90
}
91
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
92
index XXXXXXX..XXXXXXX 100644
93
--- a/accel/tcg/translator.c
94
+++ b/accel/tcg/translator.c
95
@@ -XXX,XX +XXX,XX @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int max_insns,
96
ops->tb_start(db, cpu);
97
tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */
98
99
- plugin_enabled = plugin_gen_tb_start(cpu, tb, cflags & CF_MEMI_ONLY);
100
+ plugin_enabled = plugin_gen_tb_start(cpu, db, cflags & CF_MEMI_ONLY);
101
102
while (true) {
103
db->num_insns++;
59
--
104
--
60
2.25.1
105
2.34.1
61
106
62
107
diff view generated by jsdifflib
1
The three vector shift by vector operations are all implemented via
1
Let tb->page_addr[0] contain the address of the first byte of the
2
expansion. Therefore do not actually set TCG_TARGET_HAS_shv_vec,
2
translated block, rather than the address of the page containing the
3
as none of shlv_vec, shrv_vec, sarv_vec may actually appear in the
3
start of the translated block. We need to recover this value anyway
4
instruction stream, and therefore also do not appear in tcg_target_op_def.
4
at various points, and it is easier to discard a page offset when it
5
is not needed, which happens naturally via the existing find_page shift.
5
6
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
---
9
tcg/arm/tcg-target.opc.h | 3 ++
10
accel/tcg/cpu-exec.c | 16 ++++++++--------
10
tcg/arm/tcg-target.c.inc | 61 +++++++++++++++++++++++++++++++++++++++-
11
accel/tcg/cputlb.c | 3 ++-
11
2 files changed, 63 insertions(+), 1 deletion(-)
12
accel/tcg/translate-all.c | 9 +++++----
13
3 files changed, 15 insertions(+), 13 deletions(-)
12
14
13
diff --git a/tcg/arm/tcg-target.opc.h b/tcg/arm/tcg-target.opc.h
15
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
14
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/arm/tcg-target.opc.h
17
--- a/accel/tcg/cpu-exec.c
16
+++ b/tcg/arm/tcg-target.opc.h
18
+++ b/accel/tcg/cpu-exec.c
17
@@ -XXX,XX +XXX,XX @@
19
@@ -XXX,XX +XXX,XX @@ struct tb_desc {
18
* emitted by tcg_expand_vec_op. For those familiar with GCC internals,
20
target_ulong pc;
19
* consider these to be UNSPEC with names.
21
target_ulong cs_base;
20
*/
22
CPUArchState *env;
21
+
23
- tb_page_addr_t phys_page1;
22
+DEF(arm_sshl_vec, 1, 2, 0, IMPLVEC)
24
+ tb_page_addr_t page_addr0;
23
+DEF(arm_ushl_vec, 1, 2, 0, IMPLVEC)
25
uint32_t flags;
24
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
26
uint32_t cflags;
27
uint32_t trace_vcpu_dstate;
28
@@ -XXX,XX +XXX,XX @@ static bool tb_lookup_cmp(const void *p, const void *d)
29
const struct tb_desc *desc = d;
30
31
if (tb->pc == desc->pc &&
32
- tb->page_addr[0] == desc->phys_page1 &&
33
+ tb->page_addr[0] == desc->page_addr0 &&
34
tb->cs_base == desc->cs_base &&
35
tb->flags == desc->flags &&
36
tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
37
@@ -XXX,XX +XXX,XX @@ static bool tb_lookup_cmp(const void *p, const void *d)
38
if (tb->page_addr[1] == -1) {
39
return true;
40
} else {
41
- tb_page_addr_t phys_page2;
42
- target_ulong virt_page2;
43
+ tb_page_addr_t phys_page1;
44
+ target_ulong virt_page1;
45
46
/*
47
* We know that the first page matched, and an otherwise valid TB
48
@@ -XXX,XX +XXX,XX @@ static bool tb_lookup_cmp(const void *p, const void *d)
49
* is different for the new TB. Therefore any exception raised
50
* here by the faulting lookup is not premature.
51
*/
52
- virt_page2 = TARGET_PAGE_ALIGN(desc->pc);
53
- phys_page2 = get_page_addr_code(desc->env, virt_page2);
54
- if (tb->page_addr[1] == phys_page2) {
55
+ virt_page1 = TARGET_PAGE_ALIGN(desc->pc);
56
+ phys_page1 = get_page_addr_code(desc->env, virt_page1);
57
+ if (tb->page_addr[1] == phys_page1) {
58
return true;
59
}
60
}
61
@@ -XXX,XX +XXX,XX @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
62
if (phys_pc == -1) {
63
return NULL;
64
}
65
- desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
66
+ desc.page_addr0 = phys_pc;
67
h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate);
68
return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
69
}
70
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
25
index XXXXXXX..XXXXXXX 100644
71
index XXXXXXX..XXXXXXX 100644
26
--- a/tcg/arm/tcg-target.c.inc
72
--- a/accel/tcg/cputlb.c
27
+++ b/tcg/arm/tcg-target.c.inc
73
+++ b/accel/tcg/cputlb.c
28
@@ -XXX,XX +XXX,XX @@ typedef enum {
74
@@ -XXX,XX +XXX,XX @@ void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
29
INSN_VSHLI = 0xf2800510, /* VSHL (immediate) */
75
can be detected */
30
INSN_VSARI = 0xf2800010, /* VSHR.S */
76
void tlb_protect_code(ram_addr_t ram_addr)
31
INSN_VSHRI = 0xf3800010, /* VSHR.U */
32
+ INSN_VSHL_S = 0xf2000400, /* VSHL.S (register) */
33
+ INSN_VSHL_U = 0xf3000400, /* VSHL.U (register) */
34
35
INSN_VBSL = 0xf3100110,
36
INSN_VBIT = 0xf3200110,
37
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
38
case INDEX_op_usadd_vec:
39
case INDEX_op_ussub_vec:
40
case INDEX_op_xor_vec:
41
+ case INDEX_op_arm_sshl_vec:
42
+ case INDEX_op_arm_ushl_vec:
43
return C_O1_I2(w, w, w);
44
case INDEX_op_or_vec:
45
case INDEX_op_andc_vec:
46
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
47
case INDEX_op_xor_vec:
48
tcg_out_vreg3(s, INSN_VEOR, q, 0, a0, a1, a2);
49
return;
50
+ case INDEX_op_arm_sshl_vec:
51
+ /*
52
+ * Note that Vm is the data and Vn is the shift count,
53
+ * therefore the arguments appear reversed.
54
+ */
55
+ tcg_out_vreg3(s, INSN_VSHL_S, q, vece, a0, a2, a1);
56
+ return;
57
+ case INDEX_op_arm_ushl_vec:
58
+ /* See above. */
59
+ tcg_out_vreg3(s, INSN_VSHL_U, q, vece, a0, a2, a1);
60
+ return;
61
case INDEX_op_shli_vec:
62
tcg_out_vshifti(s, INSN_VSHLI, q, a0, a1, a2 + (8 << vece));
63
return;
64
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
65
case INDEX_op_umax_vec:
66
case INDEX_op_umin_vec:
67
return vece < MO_64;
68
+ case INDEX_op_shlv_vec:
69
+ case INDEX_op_shrv_vec:
70
+ case INDEX_op_sarv_vec:
71
+ return -1;
72
default:
73
return 0;
74
}
75
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
76
void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
77
TCGArg a0, ...)
78
{
77
{
79
- g_assert_not_reached();
78
- cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE,
80
+ va_list va;
79
+ cpu_physical_memory_test_and_clear_dirty(ram_addr & TARGET_PAGE_MASK,
81
+ TCGv_vec v0, v1, v2, t1;
80
+ TARGET_PAGE_SIZE,
82
+ TCGArg a2;
81
DIRTY_MEMORY_CODE);
83
+
84
+ va_start(va, a0);
85
+ v0 = temp_tcgv_vec(arg_temp(a0));
86
+ v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
87
+ a2 = va_arg(va, TCGArg);
88
+ va_end(va);
89
+
90
+ switch (opc) {
91
+ case INDEX_op_shlv_vec:
92
+ /*
93
+ * Merely propagate shlv_vec to arm_ushl_vec.
94
+ * In this way we don't set TCG_TARGET_HAS_shv_vec
95
+ * because everything is done via expansion.
96
+ */
97
+ v2 = temp_tcgv_vec(arg_temp(a2));
98
+ vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(v0),
99
+ tcgv_vec_arg(v1), tcgv_vec_arg(v2));
100
+ break;
101
+
102
+ case INDEX_op_shrv_vec:
103
+ case INDEX_op_sarv_vec:
104
+ /* Right shifts are negative left shifts for NEON. */
105
+ v2 = temp_tcgv_vec(arg_temp(a2));
106
+ t1 = tcg_temp_new_vec(type);
107
+ tcg_gen_neg_vec(vece, t1, v2);
108
+ if (opc == INDEX_op_shrv_vec) {
109
+ opc = INDEX_op_arm_ushl_vec;
110
+ } else {
111
+ opc = INDEX_op_arm_sshl_vec;
112
+ }
113
+ vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
114
+ tcgv_vec_arg(v1), tcgv_vec_arg(t1));
115
+ tcg_temp_free_vec(t1);
116
+ break;
117
+
118
+ default:
119
+ g_assert_not_reached();
120
+ }
121
}
82
}
122
83
123
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
84
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
85
index XXXXXXX..XXXXXXX 100644
86
--- a/accel/tcg/translate-all.c
87
+++ b/accel/tcg/translate-all.c
88
@@ -XXX,XX +XXX,XX @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
89
qemu_spin_unlock(&tb->jmp_lock);
90
91
/* remove the TB from the hash list */
92
- phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
93
+ phys_pc = tb->page_addr[0];
94
h = tb_hash_func(phys_pc, tb->pc, tb->flags, orig_cflags,
95
tb->trace_vcpu_dstate);
96
if (!qht_remove(&tb_ctx.htable, tb, h)) {
97
@@ -XXX,XX +XXX,XX @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
98
* we can only insert TBs that are fully initialized.
99
*/
100
page_lock_pair(&p, phys_pc, &p2, phys_page2, true);
101
- tb_page_add(p, tb, 0, phys_pc & TARGET_PAGE_MASK);
102
+ tb_page_add(p, tb, 0, phys_pc);
103
if (p2) {
104
tb_page_add(p2, tb, 1, phys_page2);
105
} else {
106
@@ -XXX,XX +XXX,XX @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages,
107
if (n == 0) {
108
/* NOTE: tb_end may be after the end of the page, but
109
it is not a problem */
110
- tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
111
+ tb_start = tb->page_addr[0];
112
tb_end = tb_start + tb->size;
113
} else {
114
tb_start = tb->page_addr[1];
115
- tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
116
+ tb_end = tb_start + ((tb->page_addr[0] + tb->size)
117
+ & ~TARGET_PAGE_MASK);
118
}
119
if (!(tb_end <= start || tb_start >= end)) {
120
#ifdef TARGET_HAS_PRECISE_SMC
124
--
121
--
125
2.25.1
122
2.34.1
126
123
127
124
diff view generated by jsdifflib
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
1
This function has two users, who use it incompatibly.
2
In tlb_flush_page_by_mmuidx_async_0, when flushing a
3
single page, we need to flush exactly two pages.
4
In tlb_flush_range_by_mmuidx_async_0, when flushing a
5
range of pages, we need to flush N+1 pages.
6
7
This avoids double-flushing of jmp cache pages in a range.
8
9
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
11
---
4
tcg/arm/tcg-target.c.inc | 70 ++++++++++++++++++++++++++++++++++++----
12
accel/tcg/cputlb.c | 25 ++++++++++++++-----------
5
1 file changed, 64 insertions(+), 6 deletions(-)
13
1 file changed, 14 insertions(+), 11 deletions(-)
6
14
7
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
15
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
8
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/arm/tcg-target.c.inc
17
--- a/accel/tcg/cputlb.c
10
+++ b/tcg/arm/tcg-target.c.inc
18
+++ b/accel/tcg/cputlb.c
11
@@ -XXX,XX +XXX,XX @@ typedef enum {
19
@@ -XXX,XX +XXX,XX @@ static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr)
12
INSN_NOP_v6k = 0xe320f000,
13
/* Otherwise the assembler uses mov r0,r0 */
14
INSN_NOP_v4 = (COND_AL << 28) | ARITH_MOV,
15
+
16
+ INSN_VLD1 = 0xf4200000, /* VLD1 (multiple single elements) */
17
+ INSN_VST1 = 0xf4000000, /* VST1 (multiple single elements) */
18
} ARMInsn;
19
20
#define INSN_NOP (use_armv7_instructions ? INSN_NOP_v6k : INSN_NOP_v4)
21
@@ -XXX,XX +XXX,XX @@ static TCGCond tcg_out_cmp2(TCGContext *s, const TCGArg *args,
22
}
20
}
23
}
21
}
24
22
25
+/*
23
-static void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr)
26
+ * Note that TCGReg references Q-registers.
24
-{
27
+ * Q-regno = 2 * D-regno, so shift left by 1 whlie inserting.
25
- /* Discard jump cache entries for any tb which might potentially
28
+ */
26
- overlap the flushed page. */
29
+static uint32_t encode_vd(TCGReg rd)
27
- tb_jmp_cache_clear_page(cpu, addr - TARGET_PAGE_SIZE);
30
+{
28
- tb_jmp_cache_clear_page(cpu, addr);
31
+ tcg_debug_assert(rd >= TCG_REG_Q0);
29
-}
32
+ return (extract32(rd, 3, 1) << 22) | (extract32(rd, 0, 3) << 13);
30
-
33
+}
31
/**
34
+
32
* tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
35
+static void tcg_out_vldst(TCGContext *s, ARMInsn insn,
33
* @desc: The CPUTLBDesc portion of the TLB
36
+ TCGReg rd, TCGReg rn, int offset)
34
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu,
37
+{
35
}
38
+ if (offset != 0) {
36
qemu_spin_unlock(&env_tlb(env)->c.lock);
39
+ if (check_fit_imm(offset) || check_fit_imm(-offset)) {
37
40
+ tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB,
38
- tb_flush_jmp_cache(cpu, addr);
41
+ TCG_REG_TMP, rn, offset, true);
39
+ /*
42
+ } else {
40
+ * Discard jump cache entries for any tb which might potentially
43
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, offset);
41
+ * overlap the flushed page, which includes the previous.
44
+ tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
42
+ */
45
+ TCG_REG_TMP, TCG_REG_TMP, rn, 0);
43
+ tb_jmp_cache_clear_page(cpu, addr - TARGET_PAGE_SIZE);
46
+ }
44
+ tb_jmp_cache_clear_page(cpu, addr);
47
+ rn = TCG_REG_TMP;
48
+ }
49
+ tcg_out32(s, insn | (rn << 16) | encode_vd(rd) | 0xf);
50
+}
51
+
52
#ifdef CONFIG_SOFTMMU
53
#include "../tcg-ldst.c.inc"
54
55
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
56
tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
57
}
45
}
58
46
59
-static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
47
/**
60
- TCGReg arg1, intptr_t arg2)
48
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu,
61
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
49
return;
62
+ TCGReg arg1, intptr_t arg2)
50
}
63
{
51
64
- tcg_out_ld32u(s, COND_AL, arg, arg1, arg2);
52
- for (target_ulong i = 0; i < d.len; i += TARGET_PAGE_SIZE) {
65
+ switch (type) {
53
- tb_flush_jmp_cache(cpu, d.addr + i);
66
+ case TCG_TYPE_I32:
54
+ /*
67
+ tcg_out_ld32u(s, COND_AL, arg, arg1, arg2);
55
+ * Discard jump cache entries for any tb which might potentially
68
+ return;
56
+ * overlap the flushed pages, which includes the previous.
69
+ case TCG_TYPE_V64:
57
+ */
70
+ /* regs 1; size 8; align 8 */
58
+ d.addr -= TARGET_PAGE_SIZE;
71
+ tcg_out_vldst(s, INSN_VLD1 | 0x7d0, arg, arg1, arg2);
59
+ for (target_ulong i = 0, n = d.len / TARGET_PAGE_SIZE + 1; i < n; i++) {
72
+ return;
60
+ tb_jmp_cache_clear_page(cpu, d.addr);
73
+ case TCG_TYPE_V128:
61
+ d.addr += TARGET_PAGE_SIZE;
74
+ /* regs 2; size 8; align 16 */
62
}
75
+ tcg_out_vldst(s, INSN_VLD1 | 0xae0, arg, arg1, arg2);
76
+ return;
77
+ default:
78
+ g_assert_not_reached();
79
+ }
80
}
63
}
81
64
82
-static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
83
- TCGReg arg1, intptr_t arg2)
84
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
85
+ TCGReg arg1, intptr_t arg2)
86
{
87
- tcg_out_st32(s, COND_AL, arg, arg1, arg2);
88
+ switch (type) {
89
+ case TCG_TYPE_I32:
90
+ tcg_out_st32(s, COND_AL, arg, arg1, arg2);
91
+ return;
92
+ case TCG_TYPE_V64:
93
+ /* regs 1; size 8; align 8 */
94
+ tcg_out_vldst(s, INSN_VST1 | 0x7d0, arg, arg1, arg2);
95
+ return;
96
+ case TCG_TYPE_V128:
97
+ /* regs 2; size 8; align 16 */
98
+ tcg_out_vldst(s, INSN_VST1 | 0xae0, arg, arg1, arg2);
99
+ return;
100
+ default:
101
+ g_assert_not_reached();
102
+ }
103
}
104
105
static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
106
--
65
--
107
2.25.1
66
2.34.1
108
67
109
68
diff view generated by jsdifflib
1
Add registers and function stubs. The functionality
1
Wrap the bare TranslationBlock pointer into a structure.
2
is disabled via use_neon_instructions defined to 0.
3
2
4
We must still include results for the mandatory opcodes in
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
tcg_target_op_def, as all opcodes are checked during tcg init.
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
6
---
10
tcg/arm/tcg-target-con-set.h | 4 ++
7
accel/tcg/tb-hash.h | 1 +
11
tcg/arm/tcg-target-con-str.h | 1 +
8
accel/tcg/tb-jmp-cache.h | 24 ++++++++++++++++++++++++
12
tcg/arm/tcg-target.h | 48 ++++++++++++--
9
include/exec/cpu-common.h | 1 +
13
tcg/arm/tcg-target.opc.h | 12 ++++
10
include/hw/core/cpu.h | 15 +--------------
14
tcg/arm/tcg-target.c.inc | 117 +++++++++++++++++++++++++++++------
11
include/qemu/typedefs.h | 1 +
15
5 files changed, 158 insertions(+), 24 deletions(-)
12
accel/stubs/tcg-stub.c | 4 ++++
16
create mode 100644 tcg/arm/tcg-target.opc.h
13
accel/tcg/cpu-exec.c | 10 +++++++---
14
accel/tcg/cputlb.c | 9 +++++----
15
accel/tcg/translate-all.c | 28 +++++++++++++++++++++++++---
16
hw/core/cpu-common.c | 3 +--
17
plugins/core.c | 2 +-
18
trace/control-target.c | 2 +-
19
12 files changed, 72 insertions(+), 28 deletions(-)
20
create mode 100644 accel/tcg/tb-jmp-cache.h
17
21
18
diff --git a/tcg/arm/tcg-target-con-set.h b/tcg/arm/tcg-target-con-set.h
22
diff --git a/accel/tcg/tb-hash.h b/accel/tcg/tb-hash.h
19
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
20
--- a/tcg/arm/tcg-target-con-set.h
24
--- a/accel/tcg/tb-hash.h
21
+++ b/tcg/arm/tcg-target-con-set.h
25
+++ b/accel/tcg/tb-hash.h
22
@@ -XXX,XX +XXX,XX @@ C_O0_I1(r)
23
C_O0_I2(r, r)
24
C_O0_I2(r, rIN)
25
C_O0_I2(s, s)
26
+C_O0_I2(w, r)
27
C_O0_I3(s, s, s)
28
C_O0_I4(r, r, rI, rI)
29
C_O0_I4(s, s, s, s)
30
C_O1_I1(r, l)
31
C_O1_I1(r, r)
32
+C_O1_I1(w, r)
33
+C_O1_I1(w, wr)
34
C_O1_I2(r, 0, rZ)
35
C_O1_I2(r, l, l)
36
C_O1_I2(r, r, r)
37
@@ -XXX,XX +XXX,XX @@ C_O1_I2(r, r, rIK)
38
C_O1_I2(r, r, rIN)
39
C_O1_I2(r, r, ri)
40
C_O1_I2(r, rZ, rZ)
41
+C_O1_I2(w, w, w)
42
C_O1_I4(r, r, r, rI, rI)
43
C_O1_I4(r, r, rIN, rIK, 0)
44
C_O2_I1(r, r, l)
45
diff --git a/tcg/arm/tcg-target-con-str.h b/tcg/arm/tcg-target-con-str.h
46
index XXXXXXX..XXXXXXX 100644
47
--- a/tcg/arm/tcg-target-con-str.h
48
+++ b/tcg/arm/tcg-target-con-str.h
49
@@ -XXX,XX +XXX,XX @@
26
@@ -XXX,XX +XXX,XX @@
50
REGS('r', ALL_GENERAL_REGS)
27
#include "exec/cpu-defs.h"
51
REGS('l', ALL_QLOAD_REGS)
28
#include "exec/exec-all.h"
52
REGS('s', ALL_QSTORE_REGS)
29
#include "qemu/xxhash.h"
53
+REGS('w', ALL_VECTOR_REGS)
30
+#include "tb-jmp-cache.h"
54
31
55
/*
32
#ifdef CONFIG_SOFTMMU
56
* Define constraint letters for constants:
33
57
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
34
diff --git a/accel/tcg/tb-jmp-cache.h b/accel/tcg/tb-jmp-cache.h
58
index XXXXXXX..XXXXXXX 100644
59
--- a/tcg/arm/tcg-target.h
60
+++ b/tcg/arm/tcg-target.h
61
@@ -XXX,XX +XXX,XX @@ typedef enum {
62
TCG_REG_R13,
63
TCG_REG_R14,
64
TCG_REG_PC,
65
+
66
+ TCG_REG_Q0,
67
+ TCG_REG_Q1,
68
+ TCG_REG_Q2,
69
+ TCG_REG_Q3,
70
+ TCG_REG_Q4,
71
+ TCG_REG_Q5,
72
+ TCG_REG_Q6,
73
+ TCG_REG_Q7,
74
+ TCG_REG_Q8,
75
+ TCG_REG_Q9,
76
+ TCG_REG_Q10,
77
+ TCG_REG_Q11,
78
+ TCG_REG_Q12,
79
+ TCG_REG_Q13,
80
+ TCG_REG_Q14,
81
+ TCG_REG_Q15,
82
+
83
+ TCG_AREG0 = TCG_REG_R6,
84
+ TCG_REG_CALL_STACK = TCG_REG_R13,
85
} TCGReg;
86
87
-#define TCG_TARGET_NB_REGS 16
88
+#define TCG_TARGET_NB_REGS 32
89
90
#ifdef __ARM_ARCH_EXT_IDIV__
91
#define use_idiv_instructions 1
92
#else
93
extern bool use_idiv_instructions;
94
#endif
95
-
96
+#define use_neon_instructions 0
97
98
/* used for function call generation */
99
-#define TCG_REG_CALL_STACK        TCG_REG_R13
100
#define TCG_TARGET_STACK_ALIGN        8
101
#define TCG_TARGET_CALL_ALIGN_ARGS    1
102
#define TCG_TARGET_CALL_STACK_OFFSET    0
103
@@ -XXX,XX +XXX,XX @@ extern bool use_idiv_instructions;
104
#define TCG_TARGET_HAS_direct_jump 0
105
#define TCG_TARGET_HAS_qemu_st8_i32 0
106
107
-enum {
108
- TCG_AREG0 = TCG_REG_R6,
109
-};
110
+#define TCG_TARGET_HAS_v64 use_neon_instructions
111
+#define TCG_TARGET_HAS_v128 use_neon_instructions
112
+#define TCG_TARGET_HAS_v256 0
113
+
114
+#define TCG_TARGET_HAS_andc_vec 0
115
+#define TCG_TARGET_HAS_orc_vec 0
116
+#define TCG_TARGET_HAS_not_vec 0
117
+#define TCG_TARGET_HAS_neg_vec 0
118
+#define TCG_TARGET_HAS_abs_vec 0
119
+#define TCG_TARGET_HAS_roti_vec 0
120
+#define TCG_TARGET_HAS_rots_vec 0
121
+#define TCG_TARGET_HAS_rotv_vec 0
122
+#define TCG_TARGET_HAS_shi_vec 0
123
+#define TCG_TARGET_HAS_shs_vec 0
124
+#define TCG_TARGET_HAS_shv_vec 0
125
+#define TCG_TARGET_HAS_mul_vec 0
126
+#define TCG_TARGET_HAS_sat_vec 0
127
+#define TCG_TARGET_HAS_minmax_vec 0
128
+#define TCG_TARGET_HAS_bitsel_vec 0
129
+#define TCG_TARGET_HAS_cmpsel_vec 0
130
131
#define TCG_TARGET_DEFAULT_MO (0)
132
#define TCG_TARGET_HAS_MEMORY_BSWAP 1
133
diff --git a/tcg/arm/tcg-target.opc.h b/tcg/arm/tcg-target.opc.h
134
new file mode 100644
35
new file mode 100644
135
index XXXXXXX..XXXXXXX
36
index XXXXXXX..XXXXXXX
136
--- /dev/null
37
--- /dev/null
137
+++ b/tcg/arm/tcg-target.opc.h
38
+++ b/accel/tcg/tb-jmp-cache.h
138
@@ -XXX,XX +XXX,XX @@
39
@@ -XXX,XX +XXX,XX @@
139
+/*
40
+/*
140
+ * Copyright (c) 2019 Linaro
41
+ * The per-CPU TranslationBlock jump cache.
141
+ *
42
+ *
142
+ * This work is licensed under the terms of the GNU GPL, version 2 or
43
+ * Copyright (c) 2003 Fabrice Bellard
143
+ * (at your option) any later version.
144
+ *
44
+ *
145
+ * See the COPYING file in the top-level directory for details.
45
+ * SPDX-License-Identifier: GPL-2.0-or-later
146
+ *
147
+ * Target-specific opcodes for host vector expansion. These will be
148
+ * emitted by tcg_expand_vec_op. For those familiar with GCC internals,
149
+ * consider these to be UNSPEC with names.
150
+ */
46
+ */
151
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
47
+
152
index XXXXXXX..XXXXXXX 100644
48
+#ifndef ACCEL_TCG_TB_JMP_CACHE_H
153
--- a/tcg/arm/tcg-target.c.inc
49
+#define ACCEL_TCG_TB_JMP_CACHE_H
154
+++ b/tcg/arm/tcg-target.c.inc
50
+
155
@@ -XXX,XX +XXX,XX @@ bool use_idiv_instructions;
51
+#define TB_JMP_CACHE_BITS 12
156
52
+#define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS)
157
#ifdef CONFIG_DEBUG_TCG
53
+
158
static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
54
+/*
159
- "%r0",
55
+ * Accessed in parallel; all accesses to 'tb' must be atomic.
160
- "%r1",
56
+ */
161
- "%r2",
57
+struct CPUJumpCache {
162
- "%r3",
58
+ struct {
163
- "%r4",
59
+ TranslationBlock *tb;
164
- "%r5",
60
+ } array[TB_JMP_CACHE_SIZE];
165
- "%r6",
61
+};
166
- "%r7",
62
+
167
- "%r8",
63
+#endif /* ACCEL_TCG_TB_JMP_CACHE_H */
168
- "%r9",
64
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
169
- "%r10",
65
index XXXXXXX..XXXXXXX 100644
170
- "%r11",
66
--- a/include/exec/cpu-common.h
171
- "%r12",
67
+++ b/include/exec/cpu-common.h
172
- "%r13",
68
@@ -XXX,XX +XXX,XX @@ void cpu_list_unlock(void);
173
- "%r14",
69
unsigned int cpu_list_generation_id_get(void);
174
- "%pc",
70
175
+ "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
71
void tcg_flush_softmmu_tlb(CPUState *cs);
176
+ "%r8", "%r9", "%r10", "%r11", "%r12", "%sp", "%r14", "%pc",
72
+void tcg_flush_jmp_cache(CPUState *cs);
177
+ "%q0", "%q1", "%q2", "%q3", "%q4", "%q5", "%q6", "%q7",
73
178
+ "%q8", "%q9", "%q10", "%q11", "%q12", "%q13", "%q14", "%q15",
74
void tcg_iommu_init_notifier_list(CPUState *cpu);
179
};
75
void tcg_iommu_free_notifier_list(CPUState *cpu);
180
#endif
76
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
181
77
index XXXXXXX..XXXXXXX 100644
182
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = {
78
--- a/include/hw/core/cpu.h
183
TCG_REG_R3,
79
+++ b/include/hw/core/cpu.h
184
TCG_REG_R12,
80
@@ -XXX,XX +XXX,XX @@ struct kvm_run;
185
TCG_REG_R14,
81
struct hax_vcpu_state;
186
+
82
struct hvf_vcpu_state;
187
+ TCG_REG_Q0,
83
188
+ TCG_REG_Q1,
84
-#define TB_JMP_CACHE_BITS 12
189
+ TCG_REG_Q2,
85
-#define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS)
190
+ TCG_REG_Q3,
86
-
191
+ /* Q4 - Q7 are call-saved, and skipped. */
87
/* work queue */
192
+ TCG_REG_Q8,
88
193
+ TCG_REG_Q9,
89
/* The union type allows passing of 64 bit target pointers on 32 bit
194
+ TCG_REG_Q10,
90
@@ -XXX,XX +XXX,XX @@ struct CPUState {
195
+ TCG_REG_Q11,
91
CPUArchState *env_ptr;
196
+ TCG_REG_Q12,
92
IcountDecr *icount_decr_ptr;
197
+ TCG_REG_Q13,
93
198
+ TCG_REG_Q14,
94
- /* Accessed in parallel; all accesses must be atomic */
199
+ TCG_REG_Q15,
95
- TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE];
200
};
96
+ CPUJumpCache *tb_jmp_cache;
201
97
202
static const int tcg_target_call_iarg_regs[4] = {
98
struct GDBRegisterState *gdb_regs;
203
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_call_oarg_regs[2] = {
99
int gdb_num_regs;
204
};
100
@@ -XXX,XX +XXX,XX @@ extern CPUTailQ cpus;
205
101
206
#define TCG_REG_TMP TCG_REG_R12
102
extern __thread CPUState *current_cpu;
207
+#define TCG_VEC_TMP TCG_REG_Q15
103
208
104
-static inline void cpu_tb_jmp_cache_clear(CPUState *cpu)
209
enum arm_cond_code_e {
105
-{
210
COND_EQ = 0x0,
106
- unsigned int i;
211
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
107
-
212
#define TCG_CT_CONST_ZERO 0x800
108
- for (i = 0; i < TB_JMP_CACHE_SIZE; i++) {
213
109
- qatomic_set(&cpu->tb_jmp_cache[i], NULL);
214
#define ALL_GENERAL_REGS 0xffffu
110
- }
215
+#define ALL_VECTOR_REGS 0xffff0000u
111
-}
216
112
-
217
/*
113
/**
218
* r0-r2 will be overwritten when reading the tlb entry (softmmu only)
114
* qemu_tcg_mttcg_enabled:
219
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
115
* Check whether we are running MultiThread TCG or not.
220
case INDEX_op_qemu_st_i64:
116
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
221
return TARGET_LONG_BITS == 32 ? C_O0_I3(s, s, s) : C_O0_I4(s, s, s, s);
117
index XXXXXXX..XXXXXXX 100644
222
118
--- a/include/qemu/typedefs.h
223
+ case INDEX_op_st_vec:
119
+++ b/include/qemu/typedefs.h
224
+ return C_O0_I2(w, r);
120
@@ -XXX,XX +XXX,XX @@ typedef struct CoMutex CoMutex;
225
+ case INDEX_op_ld_vec:
121
typedef struct ConfidentialGuestSupport ConfidentialGuestSupport;
226
+ case INDEX_op_dupm_vec:
122
typedef struct CPUAddressSpace CPUAddressSpace;
227
+ return C_O1_I1(w, r);
123
typedef struct CPUArchState CPUArchState;
228
+ case INDEX_op_dup_vec:
124
+typedef struct CPUJumpCache CPUJumpCache;
229
+ return C_O1_I1(w, wr);
125
typedef struct CPUState CPUState;
230
+ case INDEX_op_dup2_vec:
126
typedef struct CPUTLBEntryFull CPUTLBEntryFull;
231
+ case INDEX_op_add_vec:
127
typedef struct DeviceListener DeviceListener;
232
+ case INDEX_op_sub_vec:
128
diff --git a/accel/stubs/tcg-stub.c b/accel/stubs/tcg-stub.c
233
+ case INDEX_op_xor_vec:
129
index XXXXXXX..XXXXXXX 100644
234
+ case INDEX_op_or_vec:
130
--- a/accel/stubs/tcg-stub.c
235
+ case INDEX_op_and_vec:
131
+++ b/accel/stubs/tcg-stub.c
236
+ case INDEX_op_cmp_vec:
132
@@ -XXX,XX +XXX,XX @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
237
+ return C_O1_I2(w, w, w);
133
{
238
+
134
}
239
default:
135
240
g_assert_not_reached();
136
+void tcg_flush_jmp_cache(CPUState *cpu)
241
}
137
+{
242
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
138
+}
243
{
139
+
244
/* Only probe for the platform and capabilities if we havn't already
140
int probe_access_flags(CPUArchState *env, target_ulong addr,
245
determined maximum values at compile time. */
141
MMUAccessType access_type, int mmu_idx,
246
-#ifndef use_idiv_instructions
142
bool nonfault, void **phost, uintptr_t retaddr)
247
+#if !defined(use_idiv_instructions) || !defined(use_neon_instructions)
143
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
248
{
144
index XXXXXXX..XXXXXXX 100644
249
unsigned long hwcap = qemu_getauxval(AT_HWCAP);
145
--- a/accel/tcg/cpu-exec.c
250
+#ifndef use_idiv_instructions
146
+++ b/accel/tcg/cpu-exec.c
251
use_idiv_instructions = (hwcap & HWCAP_ARM_IDIVA) != 0;
147
@@ -XXX,XX +XXX,XX @@
252
+#endif
148
#include "sysemu/replay.h"
253
+#ifndef use_neon_instructions
149
#include "sysemu/tcg.h"
254
+ use_neon_instructions = (hwcap & HWCAP_ARM_NEON) != 0;
150
#include "exec/helper-proto.h"
255
+#endif
151
+#include "tb-jmp-cache.h"
256
}
152
#include "tb-hash.h"
257
#endif
153
#include "tb-context.h"
258
+
154
#include "internal.h"
259
if (__ARM_ARCH < 7) {
155
@@ -XXX,XX +XXX,XX @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
260
const char *pl = (const char *)qemu_getauxval(AT_PLATFORM);
156
tcg_debug_assert(!(cflags & CF_INVALID));
261
if (pl != NULL && pl[0] == 'v' && pl[1] >= '4' && pl[1] <= '9') {
157
262
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
158
hash = tb_jmp_cache_hash_func(pc);
159
- tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash]);
160
+ tb = qatomic_rcu_read(&cpu->tb_jmp_cache->array[hash].tb);
161
162
if (likely(tb &&
163
tb->pc == pc &&
164
@@ -XXX,XX +XXX,XX @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
165
if (tb == NULL) {
166
return NULL;
167
}
168
- qatomic_set(&cpu->tb_jmp_cache[hash], tb);
169
+ qatomic_set(&cpu->tb_jmp_cache->array[hash].tb, tb);
170
return tb;
171
}
172
173
@@ -XXX,XX +XXX,XX @@ int cpu_exec(CPUState *cpu)
174
175
tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
176
if (tb == NULL) {
177
+ uint32_t h;
178
+
179
mmap_lock();
180
tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
181
mmap_unlock();
182
@@ -XXX,XX +XXX,XX @@ int cpu_exec(CPUState *cpu)
183
* We add the TB in the virtual pc hash table
184
* for the fast lookup
185
*/
186
- qatomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb);
187
+ h = tb_jmp_cache_hash_func(pc);
188
+ qatomic_set(&cpu->tb_jmp_cache->array[h].tb, tb);
189
}
190
191
#ifndef CONFIG_USER_ONLY
192
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
193
index XXXXXXX..XXXXXXX 100644
194
--- a/accel/tcg/cputlb.c
195
+++ b/accel/tcg/cputlb.c
196
@@ -XXX,XX +XXX,XX @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
197
198
static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr)
199
{
200
- unsigned int i, i0 = tb_jmp_cache_hash_page(page_addr);
201
+ int i, i0 = tb_jmp_cache_hash_page(page_addr);
202
+ CPUJumpCache *jc = cpu->tb_jmp_cache;
203
204
for (i = 0; i < TB_JMP_PAGE_SIZE; i++) {
205
- qatomic_set(&cpu->tb_jmp_cache[i0 + i], NULL);
206
+ qatomic_set(&jc->array[i0 + i].tb, NULL);
207
}
208
}
209
210
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
211
212
qemu_spin_unlock(&env_tlb(env)->c.lock);
213
214
- cpu_tb_jmp_cache_clear(cpu);
215
+ tcg_flush_jmp_cache(cpu);
216
217
if (to_clean == ALL_MMUIDX_BITS) {
218
qatomic_set(&env_tlb(env)->c.full_flush_count,
219
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu,
220
* longer to clear each entry individually than it will to clear it all.
221
*/
222
if (d.len >= (TARGET_PAGE_SIZE * TB_JMP_CACHE_SIZE)) {
223
- cpu_tb_jmp_cache_clear(cpu);
224
+ tcg_flush_jmp_cache(cpu);
225
return;
226
}
227
228
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
229
index XXXXXXX..XXXXXXX 100644
230
--- a/accel/tcg/translate-all.c
231
+++ b/accel/tcg/translate-all.c
232
@@ -XXX,XX +XXX,XX @@
233
#include "sysemu/tcg.h"
234
#include "qapi/error.h"
235
#include "hw/core/tcg-cpu-ops.h"
236
+#include "tb-jmp-cache.h"
237
#include "tb-hash.h"
238
#include "tb-context.h"
239
#include "internal.h"
240
@@ -XXX,XX +XXX,XX @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
241
}
242
243
CPU_FOREACH(cpu) {
244
- cpu_tb_jmp_cache_clear(cpu);
245
+ tcg_flush_jmp_cache(cpu);
246
}
247
248
qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
249
@@ -XXX,XX +XXX,XX @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
250
/* remove the TB from the hash list */
251
h = tb_jmp_cache_hash_func(tb->pc);
252
CPU_FOREACH(cpu) {
253
- if (qatomic_read(&cpu->tb_jmp_cache[h]) == tb) {
254
- qatomic_set(&cpu->tb_jmp_cache[h], NULL);
255
+ CPUJumpCache *jc = cpu->tb_jmp_cache;
256
+ if (qatomic_read(&jc->array[h].tb) == tb) {
257
+ qatomic_set(&jc->array[h].tb, NULL);
263
}
258
}
264
}
259
}
265
260
266
- tcg_target_available_regs[TCG_TYPE_I32] = 0xffff;
261
@@ -XXX,XX +XXX,XX @@ int page_unprotect(target_ulong address, uintptr_t pc)
267
+ tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
262
}
268
263
#endif /* CONFIG_USER_ONLY */
269
tcg_target_call_clobber_regs = 0;
264
270
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
265
+/*
271
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
266
+ * Called by generic code at e.g. cpu reset after cpu creation,
272
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
267
+ * therefore we must be prepared to allocate the jump cache.
273
tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
268
+ */
274
269
+void tcg_flush_jmp_cache(CPUState *cpu)
275
+ if (use_neon_instructions) {
270
+{
276
+ tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
271
+ CPUJumpCache *jc = cpu->tb_jmp_cache;
277
+ tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
272
+
278
+
273
+ if (likely(jc)) {
279
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q0);
274
+ for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) {
280
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q1);
275
+ qatomic_set(&jc->array[i].tb, NULL);
281
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q2);
276
+ }
282
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q3);
277
+ } else {
283
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q8);
278
+ /* This should happen once during realize, and thus never race. */
284
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q9);
279
+ jc = g_new0(CPUJumpCache, 1);
285
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q10);
280
+ jc = qatomic_xchg(&cpu->tb_jmp_cache, jc);
286
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q11);
281
+ assert(jc == NULL);
287
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q12);
288
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q13);
289
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q14);
290
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q15);
291
+ }
282
+ }
292
+
293
s->reserved_regs = 0;
294
tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
295
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
296
tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC);
297
+ tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
298
}
299
300
static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
301
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_movi(TCGContext *s, TCGType type,
302
tcg_out_movi32(s, COND_AL, ret, arg);
303
}
304
305
+static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
306
+ TCGReg rd, TCGReg rs)
307
+{
308
+ g_assert_not_reached();
309
+}
283
+}
310
+
284
+
311
+static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
285
/* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
312
+ TCGReg rd, TCGReg base, intptr_t offset)
286
void tcg_flush_softmmu_tlb(CPUState *cs)
313
+{
287
{
314
+ g_assert_not_reached();
288
diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c
315
+}
289
index XXXXXXX..XXXXXXX 100644
316
+
290
--- a/hw/core/cpu-common.c
317
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
291
+++ b/hw/core/cpu-common.c
318
+ TCGReg rd, int64_t v64)
292
@@ -XXX,XX +XXX,XX @@ static void cpu_common_reset(DeviceState *dev)
319
+{
293
cpu->cflags_next_tb = -1;
320
+ g_assert_not_reached();
294
321
+}
295
if (tcg_enabled()) {
322
+
296
- cpu_tb_jmp_cache_clear(cpu);
323
+static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
297
-
324
+ unsigned vecl, unsigned vece,
298
+ tcg_flush_jmp_cache(cpu);
325
+ const TCGArg *args, const int *const_args)
299
tcg_flush_softmmu_tlb(cpu);
326
+{
300
}
327
+ g_assert_not_reached();
301
}
328
+}
302
diff --git a/plugins/core.c b/plugins/core.c
329
+
303
index XXXXXXX..XXXXXXX 100644
330
+int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
304
--- a/plugins/core.c
331
+{
305
+++ b/plugins/core.c
332
+ return 0;
306
@@ -XXX,XX +XXX,XX @@ struct qemu_plugin_ctx *plugin_id_to_ctx_locked(qemu_plugin_id_t id)
333
+}
307
static void plugin_cpu_update__async(CPUState *cpu, run_on_cpu_data data)
334
+
308
{
335
+void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
309
bitmap_copy(cpu->plugin_mask, &data.host_ulong, QEMU_PLUGIN_EV_MAX);
336
+ TCGArg a0, ...)
310
- cpu_tb_jmp_cache_clear(cpu);
337
+{
311
+ tcg_flush_jmp_cache(cpu);
338
+ g_assert_not_reached();
312
}
339
+}
313
340
+
314
static void plugin_cpu_update__locked(gpointer k, gpointer v, gpointer udata)
341
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
315
diff --git a/trace/control-target.c b/trace/control-target.c
342
{
316
index XXXXXXX..XXXXXXX 100644
343
int i;
317
--- a/trace/control-target.c
318
+++ b/trace/control-target.c
319
@@ -XXX,XX +XXX,XX @@ static void trace_event_synchronize_vcpu_state_dynamic(
320
{
321
bitmap_copy(vcpu->trace_dstate, vcpu->trace_dstate_delayed,
322
CPU_TRACE_DSTATE_MAX_EVENTS);
323
- cpu_tb_jmp_cache_clear(vcpu);
324
+ tcg_flush_jmp_cache(vcpu);
325
}
326
327
void trace_event_set_vcpu_state_dynamic(CPUState *vcpu,
344
--
328
--
345
2.25.1
329
2.34.1
346
330
347
331
diff view generated by jsdifflib
1
These logical and arithmetic operations are optional, but are
1
Populate this new method for all targets. Always match
2
trivial to accomplish with the existing infrastructure.
2
the result that would be given by cpu_get_tb_cpu_state,
3
as we will want these values to correspond in the logs.
3
4
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Taylor Simpson <tsimpson@quicinc.com>
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Reviewed-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> (target/sparc)
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
9
---
7
tcg/arm/tcg-target-con-set.h | 1 +
10
Cc: Eduardo Habkost <eduardo@habkost.net> (supporter:Machine core)
8
tcg/arm/tcg-target.h | 10 +++++-----
11
Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com> (supporter:Machine core)
9
tcg/arm/tcg-target.c.inc | 38 ++++++++++++++++++++++++++++++++++++
12
Cc: "Philippe Mathieu-Daudé" <f4bug@amsat.org> (reviewer:Machine core)
10
3 files changed, 44 insertions(+), 5 deletions(-)
13
Cc: Yanan Wang <wangyanan55@huawei.com> (reviewer:Machine core)
14
Cc: Michael Rolnik <mrolnik@gmail.com> (maintainer:AVR TCG CPUs)
15
Cc: "Edgar E. Iglesias" <edgar.iglesias@gmail.com> (maintainer:CRIS TCG CPUs)
16
Cc: Taylor Simpson <tsimpson@quicinc.com> (supporter:Hexagon TCG CPUs)
17
Cc: Song Gao <gaosong@loongson.cn> (maintainer:LoongArch TCG CPUs)
18
Cc: Xiaojuan Yang <yangxiaojuan@loongson.cn> (maintainer:LoongArch TCG CPUs)
19
Cc: Laurent Vivier <laurent@vivier.eu> (maintainer:M68K TCG CPUs)
20
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com> (reviewer:MIPS TCG CPUs)
21
Cc: Aleksandar Rikalo <aleksandar.rikalo@syrmia.com> (reviewer:MIPS TCG CPUs)
22
Cc: Chris Wulff <crwulff@gmail.com> (maintainer:NiosII TCG CPUs)
23
Cc: Marek Vasut <marex@denx.de> (maintainer:NiosII TCG CPUs)
24
Cc: Stafford Horne <shorne@gmail.com> (odd fixer:OpenRISC TCG CPUs)
25
Cc: Yoshinori Sato <ysato@users.sourceforge.jp> (reviewer:RENESAS RX CPUs)
26
Cc: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> (maintainer:SPARC TCG CPUs)
27
Cc: Bastian Koppelmann <kbastian@mail.uni-paderborn.de> (maintainer:TriCore TCG CPUs)
28
Cc: Max Filippov <jcmvbkbc@gmail.com> (maintainer:Xtensa TCG CPUs)
29
Cc: qemu-arm@nongnu.org (open list:ARM TCG CPUs)
30
Cc: qemu-ppc@nongnu.org (open list:PowerPC TCG CPUs)
31
Cc: qemu-riscv@nongnu.org (open list:RISC-V TCG CPUs)
32
Cc: qemu-s390x@nongnu.org (open list:S390 TCG CPUs)
33
---
34
include/hw/core/cpu.h | 3 +++
35
target/alpha/cpu.c | 9 +++++++++
36
target/arm/cpu.c | 13 +++++++++++++
37
target/avr/cpu.c | 8 ++++++++
38
target/cris/cpu.c | 8 ++++++++
39
target/hexagon/cpu.c | 8 ++++++++
40
target/hppa/cpu.c | 8 ++++++++
41
target/i386/cpu.c | 9 +++++++++
42
target/loongarch/cpu.c | 9 +++++++++
43
target/m68k/cpu.c | 8 ++++++++
44
target/microblaze/cpu.c | 8 ++++++++
45
target/mips/cpu.c | 8 ++++++++
46
target/nios2/cpu.c | 9 +++++++++
47
target/openrisc/cpu.c | 8 ++++++++
48
target/ppc/cpu_init.c | 8 ++++++++
49
target/riscv/cpu.c | 13 +++++++++++++
50
target/rx/cpu.c | 8 ++++++++
51
target/s390x/cpu.c | 8 ++++++++
52
target/sh4/cpu.c | 8 ++++++++
53
target/sparc/cpu.c | 8 ++++++++
54
target/tricore/cpu.c | 9 +++++++++
55
target/xtensa/cpu.c | 8 ++++++++
56
22 files changed, 186 insertions(+)
11
57
12
diff --git a/tcg/arm/tcg-target-con-set.h b/tcg/arm/tcg-target-con-set.h
58
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
13
index XXXXXXX..XXXXXXX 100644
59
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/arm/tcg-target-con-set.h
60
--- a/include/hw/core/cpu.h
15
+++ b/tcg/arm/tcg-target-con-set.h
61
+++ b/include/hw/core/cpu.h
16
@@ -XXX,XX +XXX,XX @@ C_O0_I4(s, s, s, s)
62
@@ -XXX,XX +XXX,XX @@ struct SysemuCPUOps;
17
C_O1_I1(r, l)
63
* If the target behaviour here is anything other than "set
18
C_O1_I1(r, r)
64
* the PC register to the value passed in" then the target must
19
C_O1_I1(w, r)
65
* also implement the synchronize_from_tb hook.
20
+C_O1_I1(w, w)
66
+ * @get_pc: Callback for getting the Program Counter register.
21
C_O1_I1(w, wr)
67
+ * As above, with the semantics of the target architecture.
22
C_O1_I2(r, 0, rZ)
68
* @gdb_read_register: Callback for letting GDB read a register.
23
C_O1_I2(r, l, l)
69
* @gdb_write_register: Callback for letting GDB write a register.
24
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
70
* @gdb_adjust_breakpoint: Callback for adjusting the address of a
25
index XXXXXXX..XXXXXXX 100644
71
@@ -XXX,XX +XXX,XX @@ struct CPUClass {
26
--- a/tcg/arm/tcg-target.h
72
void (*dump_state)(CPUState *cpu, FILE *, int flags);
27
+++ b/tcg/arm/tcg-target.h
73
int64_t (*get_arch_id)(CPUState *cpu);
28
@@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions;
74
void (*set_pc)(CPUState *cpu, vaddr value);
29
#define TCG_TARGET_HAS_v128 use_neon_instructions
75
+ vaddr (*get_pc)(CPUState *cpu);
30
#define TCG_TARGET_HAS_v256 0
76
int (*gdb_read_register)(CPUState *cpu, GByteArray *buf, int reg);
31
77
int (*gdb_write_register)(CPUState *cpu, uint8_t *buf, int reg);
32
-#define TCG_TARGET_HAS_andc_vec 0
78
vaddr (*gdb_adjust_breakpoint)(CPUState *cpu, vaddr addr);
33
-#define TCG_TARGET_HAS_orc_vec 0
79
diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c
34
-#define TCG_TARGET_HAS_not_vec 0
80
index XXXXXXX..XXXXXXX 100644
35
-#define TCG_TARGET_HAS_neg_vec 0
81
--- a/target/alpha/cpu.c
36
-#define TCG_TARGET_HAS_abs_vec 0
82
+++ b/target/alpha/cpu.c
37
+#define TCG_TARGET_HAS_andc_vec 1
83
@@ -XXX,XX +XXX,XX @@ static void alpha_cpu_set_pc(CPUState *cs, vaddr value)
38
+#define TCG_TARGET_HAS_orc_vec 1
84
cpu->env.pc = value;
39
+#define TCG_TARGET_HAS_not_vec 1
85
}
40
+#define TCG_TARGET_HAS_neg_vec 1
86
41
+#define TCG_TARGET_HAS_abs_vec 1
87
+static vaddr alpha_cpu_get_pc(CPUState *cs)
42
#define TCG_TARGET_HAS_roti_vec 0
88
+{
43
#define TCG_TARGET_HAS_rots_vec 0
89
+ AlphaCPU *cpu = ALPHA_CPU(cs);
44
#define TCG_TARGET_HAS_rotv_vec 0
90
+
45
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
91
+ return cpu->env.pc;
46
index XXXXXXX..XXXXXXX 100644
92
+}
47
--- a/tcg/arm/tcg-target.c.inc
93
+
48
+++ b/tcg/arm/tcg-target.c.inc
94
+
49
@@ -XXX,XX +XXX,XX @@ typedef enum {
95
static bool alpha_cpu_has_work(CPUState *cs)
50
96
{
51
INSN_VADD = 0xf2000800,
97
/* Here we are checking to see if the CPU should wake up from HALT.
52
INSN_VAND = 0xf2000110,
98
@@ -XXX,XX +XXX,XX @@ static void alpha_cpu_class_init(ObjectClass *oc, void *data)
53
+ INSN_VBIC = 0xf2100110,
99
cc->has_work = alpha_cpu_has_work;
54
INSN_VEOR = 0xf3000110,
100
cc->dump_state = alpha_cpu_dump_state;
55
+ INSN_VORN = 0xf2300110,
101
cc->set_pc = alpha_cpu_set_pc;
56
INSN_VORR = 0xf2200110,
102
+ cc->get_pc = alpha_cpu_get_pc;
57
INSN_VSUB = 0xf3000800,
103
cc->gdb_read_register = alpha_cpu_gdb_read_register;
58
104
cc->gdb_write_register = alpha_cpu_gdb_write_register;
59
+ INSN_VABS = 0xf3b10300,
105
#ifndef CONFIG_USER_ONLY
60
INSN_VMVN = 0xf3b00580,
106
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
61
+ INSN_VNEG = 0xf3b10380,
107
index XXXXXXX..XXXXXXX 100644
62
108
--- a/target/arm/cpu.c
63
INSN_VCEQ0 = 0xf3b10100,
109
+++ b/target/arm/cpu.c
64
INSN_VCGT0 = 0xf3b10000,
110
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_set_pc(CPUState *cs, vaddr value)
65
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
111
}
66
return C_O1_I1(w, r);
112
}
67
case INDEX_op_dup_vec:
113
68
return C_O1_I1(w, wr);
114
+static vaddr arm_cpu_get_pc(CPUState *cs)
69
+ case INDEX_op_abs_vec:
115
+{
70
+ case INDEX_op_neg_vec:
116
+ ARMCPU *cpu = ARM_CPU(cs);
71
+ case INDEX_op_not_vec:
117
+ CPUARMState *env = &cpu->env;
72
+ return C_O1_I1(w, w);
118
+
73
case INDEX_op_dup2_vec:
119
+ if (is_a64(env)) {
74
case INDEX_op_add_vec:
120
+ return env->pc;
75
case INDEX_op_sub_vec:
121
+ } else {
76
case INDEX_op_xor_vec:
122
+ return env->regs[15];
77
return C_O1_I2(w, w, w);
123
+ }
78
case INDEX_op_or_vec:
124
+}
79
+ case INDEX_op_andc_vec:
125
+
80
return C_O1_I2(w, w, wO);
126
#ifdef CONFIG_TCG
81
case INDEX_op_and_vec:
127
void arm_cpu_synchronize_from_tb(CPUState *cs,
82
+ case INDEX_op_orc_vec:
128
const TranslationBlock *tb)
83
return C_O1_I2(w, w, wV);
129
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_class_init(ObjectClass *oc, void *data)
84
case INDEX_op_cmp_vec:
130
cc->has_work = arm_cpu_has_work;
85
return C_O1_I2(w, w, wZ);
131
cc->dump_state = arm_cpu_dump_state;
86
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
132
cc->set_pc = arm_cpu_set_pc;
87
case INDEX_op_dup2_vec:
133
+ cc->get_pc = arm_cpu_get_pc;
88
tcg_out_dup2_vec(s, a0, a1, a2);
134
cc->gdb_read_register = arm_cpu_gdb_read_register;
89
return;
135
cc->gdb_write_register = arm_cpu_gdb_write_register;
90
+ case INDEX_op_abs_vec:
136
#ifndef CONFIG_USER_ONLY
91
+ tcg_out_vreg2(s, INSN_VABS, q, vece, a0, a1);
137
diff --git a/target/avr/cpu.c b/target/avr/cpu.c
92
+ return;
138
index XXXXXXX..XXXXXXX 100644
93
+ case INDEX_op_neg_vec:
139
--- a/target/avr/cpu.c
94
+ tcg_out_vreg2(s, INSN_VNEG, q, vece, a0, a1);
140
+++ b/target/avr/cpu.c
95
+ return;
141
@@ -XXX,XX +XXX,XX @@ static void avr_cpu_set_pc(CPUState *cs, vaddr value)
96
+ case INDEX_op_not_vec:
142
cpu->env.pc_w = value / 2; /* internally PC points to words */
97
+ tcg_out_vreg2(s, INSN_VMVN, q, 0, a0, a1);
143
}
98
+ return;
144
99
case INDEX_op_add_vec:
145
+static vaddr avr_cpu_get_pc(CPUState *cs)
100
tcg_out_vreg3(s, INSN_VADD, q, vece, a0, a1, a2);
146
+{
101
return;
147
+ AVRCPU *cpu = AVR_CPU(cs);
102
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
148
+
103
tcg_out_vreg3(s, INSN_VEOR, q, 0, a0, a1, a2);
149
+ return cpu->env.pc_w * 2;
104
return;
150
+}
105
151
+
106
+ case INDEX_op_andc_vec:
152
static bool avr_cpu_has_work(CPUState *cs)
107
+ if (!const_args[2]) {
153
{
108
+ tcg_out_vreg3(s, INSN_VBIC, q, 0, a0, a1, a2);
154
AVRCPU *cpu = AVR_CPU(cs);
109
+ return;
155
@@ -XXX,XX +XXX,XX @@ static void avr_cpu_class_init(ObjectClass *oc, void *data)
110
+ }
156
cc->has_work = avr_cpu_has_work;
111
+ a2 = ~a2;
157
cc->dump_state = avr_cpu_dump_state;
112
+ /* fall through */
158
cc->set_pc = avr_cpu_set_pc;
113
case INDEX_op_and_vec:
159
+ cc->get_pc = avr_cpu_get_pc;
114
if (const_args[2]) {
160
dc->vmsd = &vms_avr_cpu;
115
is_shimm1632(~a2, &cmode, &imm8);
161
cc->sysemu_ops = &avr_sysemu_ops;
116
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
162
cc->disas_set_info = avr_cpu_disas_set_info;
117
tcg_out_vreg3(s, INSN_VAND, q, 0, a0, a1, a2);
163
diff --git a/target/cris/cpu.c b/target/cris/cpu.c
118
return;
164
index XXXXXXX..XXXXXXX 100644
119
165
--- a/target/cris/cpu.c
120
+ case INDEX_op_orc_vec:
166
+++ b/target/cris/cpu.c
121
+ if (!const_args[2]) {
167
@@ -XXX,XX +XXX,XX @@ static void cris_cpu_set_pc(CPUState *cs, vaddr value)
122
+ tcg_out_vreg3(s, INSN_VORN, q, 0, a0, a1, a2);
168
cpu->env.pc = value;
123
+ return;
169
}
124
+ }
170
125
+ a2 = ~a2;
171
+static vaddr cris_cpu_get_pc(CPUState *cs)
126
+ /* fall through */
172
+{
127
case INDEX_op_or_vec:
173
+ CRISCPU *cpu = CRIS_CPU(cs);
128
if (const_args[2]) {
174
+
129
is_shimm1632(a2, &cmode, &imm8);
175
+ return cpu->env.pc;
130
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
176
+}
131
case INDEX_op_add_vec:
177
+
132
case INDEX_op_sub_vec:
178
static bool cris_cpu_has_work(CPUState *cs)
133
case INDEX_op_and_vec:
179
{
134
+ case INDEX_op_andc_vec:
180
return cs->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI);
135
case INDEX_op_or_vec:
181
@@ -XXX,XX +XXX,XX @@ static void cris_cpu_class_init(ObjectClass *oc, void *data)
136
+ case INDEX_op_orc_vec:
182
cc->has_work = cris_cpu_has_work;
137
case INDEX_op_xor_vec:
183
cc->dump_state = cris_cpu_dump_state;
138
+ case INDEX_op_not_vec:
184
cc->set_pc = cris_cpu_set_pc;
139
return 1;
185
+ cc->get_pc = cris_cpu_get_pc;
140
+ case INDEX_op_abs_vec:
186
cc->gdb_read_register = cris_cpu_gdb_read_register;
141
case INDEX_op_cmp_vec:
187
cc->gdb_write_register = cris_cpu_gdb_write_register;
142
+ case INDEX_op_neg_vec:
188
#ifndef CONFIG_USER_ONLY
143
return vece < MO_64;
189
diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c
144
default:
190
index XXXXXXX..XXXXXXX 100644
145
return 0;
191
--- a/target/hexagon/cpu.c
192
+++ b/target/hexagon/cpu.c
193
@@ -XXX,XX +XXX,XX @@ static void hexagon_cpu_set_pc(CPUState *cs, vaddr value)
194
env->gpr[HEX_REG_PC] = value;
195
}
196
197
+static vaddr hexagon_cpu_get_pc(CPUState *cs)
198
+{
199
+ HexagonCPU *cpu = HEXAGON_CPU(cs);
200
+ CPUHexagonState *env = &cpu->env;
201
+ return env->gpr[HEX_REG_PC];
202
+}
203
+
204
static void hexagon_cpu_synchronize_from_tb(CPUState *cs,
205
const TranslationBlock *tb)
206
{
207
@@ -XXX,XX +XXX,XX @@ static void hexagon_cpu_class_init(ObjectClass *c, void *data)
208
cc->has_work = hexagon_cpu_has_work;
209
cc->dump_state = hexagon_dump_state;
210
cc->set_pc = hexagon_cpu_set_pc;
211
+ cc->get_pc = hexagon_cpu_get_pc;
212
cc->gdb_read_register = hexagon_gdb_read_register;
213
cc->gdb_write_register = hexagon_gdb_write_register;
214
cc->gdb_num_core_regs = TOTAL_PER_THREAD_REGS + NUM_VREGS + NUM_QREGS;
215
diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c
216
index XXXXXXX..XXXXXXX 100644
217
--- a/target/hppa/cpu.c
218
+++ b/target/hppa/cpu.c
219
@@ -XXX,XX +XXX,XX @@ static void hppa_cpu_set_pc(CPUState *cs, vaddr value)
220
cpu->env.iaoq_b = value + 4;
221
}
222
223
+static vaddr hppa_cpu_get_pc(CPUState *cs)
224
+{
225
+ HPPACPU *cpu = HPPA_CPU(cs);
226
+
227
+ return cpu->env.iaoq_f;
228
+}
229
+
230
static void hppa_cpu_synchronize_from_tb(CPUState *cs,
231
const TranslationBlock *tb)
232
{
233
@@ -XXX,XX +XXX,XX @@ static void hppa_cpu_class_init(ObjectClass *oc, void *data)
234
cc->has_work = hppa_cpu_has_work;
235
cc->dump_state = hppa_cpu_dump_state;
236
cc->set_pc = hppa_cpu_set_pc;
237
+ cc->get_pc = hppa_cpu_get_pc;
238
cc->gdb_read_register = hppa_cpu_gdb_read_register;
239
cc->gdb_write_register = hppa_cpu_gdb_write_register;
240
#ifndef CONFIG_USER_ONLY
241
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
242
index XXXXXXX..XXXXXXX 100644
243
--- a/target/i386/cpu.c
244
+++ b/target/i386/cpu.c
245
@@ -XXX,XX +XXX,XX @@ static void x86_cpu_set_pc(CPUState *cs, vaddr value)
246
cpu->env.eip = value;
247
}
248
249
+static vaddr x86_cpu_get_pc(CPUState *cs)
250
+{
251
+ X86CPU *cpu = X86_CPU(cs);
252
+
253
+ /* Match cpu_get_tb_cpu_state. */
254
+ return cpu->env.eip + cpu->env.segs[R_CS].base;
255
+}
256
+
257
int x86_cpu_pending_interrupt(CPUState *cs, int interrupt_request)
258
{
259
X86CPU *cpu = X86_CPU(cs);
260
@@ -XXX,XX +XXX,XX @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data)
261
cc->has_work = x86_cpu_has_work;
262
cc->dump_state = x86_cpu_dump_state;
263
cc->set_pc = x86_cpu_set_pc;
264
+ cc->get_pc = x86_cpu_get_pc;
265
cc->gdb_read_register = x86_cpu_gdb_read_register;
266
cc->gdb_write_register = x86_cpu_gdb_write_register;
267
cc->get_arch_id = x86_cpu_get_arch_id;
268
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
269
index XXXXXXX..XXXXXXX 100644
270
--- a/target/loongarch/cpu.c
271
+++ b/target/loongarch/cpu.c
272
@@ -XXX,XX +XXX,XX @@ static void loongarch_cpu_set_pc(CPUState *cs, vaddr value)
273
env->pc = value;
274
}
275
276
+static vaddr loongarch_cpu_get_pc(CPUState *cs)
277
+{
278
+ LoongArchCPU *cpu = LOONGARCH_CPU(cs);
279
+ CPULoongArchState *env = &cpu->env;
280
+
281
+ return env->pc;
282
+}
283
+
284
#ifndef CONFIG_USER_ONLY
285
#include "hw/loongarch/virt.h"
286
287
@@ -XXX,XX +XXX,XX @@ static void loongarch_cpu_class_init(ObjectClass *c, void *data)
288
cc->has_work = loongarch_cpu_has_work;
289
cc->dump_state = loongarch_cpu_dump_state;
290
cc->set_pc = loongarch_cpu_set_pc;
291
+ cc->get_pc = loongarch_cpu_get_pc;
292
#ifndef CONFIG_USER_ONLY
293
dc->vmsd = &vmstate_loongarch_cpu;
294
cc->sysemu_ops = &loongarch_sysemu_ops;
295
diff --git a/target/m68k/cpu.c b/target/m68k/cpu.c
296
index XXXXXXX..XXXXXXX 100644
297
--- a/target/m68k/cpu.c
298
+++ b/target/m68k/cpu.c
299
@@ -XXX,XX +XXX,XX @@ static void m68k_cpu_set_pc(CPUState *cs, vaddr value)
300
cpu->env.pc = value;
301
}
302
303
+static vaddr m68k_cpu_get_pc(CPUState *cs)
304
+{
305
+ M68kCPU *cpu = M68K_CPU(cs);
306
+
307
+ return cpu->env.pc;
308
+}
309
+
310
static bool m68k_cpu_has_work(CPUState *cs)
311
{
312
return cs->interrupt_request & CPU_INTERRUPT_HARD;
313
@@ -XXX,XX +XXX,XX @@ static void m68k_cpu_class_init(ObjectClass *c, void *data)
314
cc->has_work = m68k_cpu_has_work;
315
cc->dump_state = m68k_cpu_dump_state;
316
cc->set_pc = m68k_cpu_set_pc;
317
+ cc->get_pc = m68k_cpu_get_pc;
318
cc->gdb_read_register = m68k_cpu_gdb_read_register;
319
cc->gdb_write_register = m68k_cpu_gdb_write_register;
320
#if defined(CONFIG_SOFTMMU)
321
diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c
322
index XXXXXXX..XXXXXXX 100644
323
--- a/target/microblaze/cpu.c
324
+++ b/target/microblaze/cpu.c
325
@@ -XXX,XX +XXX,XX @@ static void mb_cpu_set_pc(CPUState *cs, vaddr value)
326
cpu->env.iflags = 0;
327
}
328
329
+static vaddr mb_cpu_get_pc(CPUState *cs)
330
+{
331
+ MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs);
332
+
333
+ return cpu->env.pc;
334
+}
335
+
336
static void mb_cpu_synchronize_from_tb(CPUState *cs,
337
const TranslationBlock *tb)
338
{
339
@@ -XXX,XX +XXX,XX @@ static void mb_cpu_class_init(ObjectClass *oc, void *data)
340
341
cc->dump_state = mb_cpu_dump_state;
342
cc->set_pc = mb_cpu_set_pc;
343
+ cc->get_pc = mb_cpu_get_pc;
344
cc->gdb_read_register = mb_cpu_gdb_read_register;
345
cc->gdb_write_register = mb_cpu_gdb_write_register;
346
347
diff --git a/target/mips/cpu.c b/target/mips/cpu.c
348
index XXXXXXX..XXXXXXX 100644
349
--- a/target/mips/cpu.c
350
+++ b/target/mips/cpu.c
351
@@ -XXX,XX +XXX,XX @@ static void mips_cpu_set_pc(CPUState *cs, vaddr value)
352
mips_env_set_pc(&cpu->env, value);
353
}
354
355
+static vaddr mips_cpu_get_pc(CPUState *cs)
356
+{
357
+ MIPSCPU *cpu = MIPS_CPU(cs);
358
+
359
+ return cpu->env.active_tc.PC;
360
+}
361
+
362
static bool mips_cpu_has_work(CPUState *cs)
363
{
364
MIPSCPU *cpu = MIPS_CPU(cs);
365
@@ -XXX,XX +XXX,XX @@ static void mips_cpu_class_init(ObjectClass *c, void *data)
366
cc->has_work = mips_cpu_has_work;
367
cc->dump_state = mips_cpu_dump_state;
368
cc->set_pc = mips_cpu_set_pc;
369
+ cc->get_pc = mips_cpu_get_pc;
370
cc->gdb_read_register = mips_cpu_gdb_read_register;
371
cc->gdb_write_register = mips_cpu_gdb_write_register;
372
#ifndef CONFIG_USER_ONLY
373
diff --git a/target/nios2/cpu.c b/target/nios2/cpu.c
374
index XXXXXXX..XXXXXXX 100644
375
--- a/target/nios2/cpu.c
376
+++ b/target/nios2/cpu.c
377
@@ -XXX,XX +XXX,XX @@ static void nios2_cpu_set_pc(CPUState *cs, vaddr value)
378
env->pc = value;
379
}
380
381
+static vaddr nios2_cpu_get_pc(CPUState *cs)
382
+{
383
+ Nios2CPU *cpu = NIOS2_CPU(cs);
384
+ CPUNios2State *env = &cpu->env;
385
+
386
+ return env->pc;
387
+}
388
+
389
static bool nios2_cpu_has_work(CPUState *cs)
390
{
391
return cs->interrupt_request & CPU_INTERRUPT_HARD;
392
@@ -XXX,XX +XXX,XX @@ static void nios2_cpu_class_init(ObjectClass *oc, void *data)
393
cc->has_work = nios2_cpu_has_work;
394
cc->dump_state = nios2_cpu_dump_state;
395
cc->set_pc = nios2_cpu_set_pc;
396
+ cc->get_pc = nios2_cpu_get_pc;
397
cc->disas_set_info = nios2_cpu_disas_set_info;
398
#ifndef CONFIG_USER_ONLY
399
cc->sysemu_ops = &nios2_sysemu_ops;
400
diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c
401
index XXXXXXX..XXXXXXX 100644
402
--- a/target/openrisc/cpu.c
403
+++ b/target/openrisc/cpu.c
404
@@ -XXX,XX +XXX,XX @@ static void openrisc_cpu_set_pc(CPUState *cs, vaddr value)
405
cpu->env.dflag = 0;
406
}
407
408
+static vaddr openrisc_cpu_get_pc(CPUState *cs)
409
+{
410
+ OpenRISCCPU *cpu = OPENRISC_CPU(cs);
411
+
412
+ return cpu->env.pc;
413
+}
414
+
415
static void openrisc_cpu_synchronize_from_tb(CPUState *cs,
416
const TranslationBlock *tb)
417
{
418
@@ -XXX,XX +XXX,XX @@ static void openrisc_cpu_class_init(ObjectClass *oc, void *data)
419
cc->has_work = openrisc_cpu_has_work;
420
cc->dump_state = openrisc_cpu_dump_state;
421
cc->set_pc = openrisc_cpu_set_pc;
422
+ cc->get_pc = openrisc_cpu_get_pc;
423
cc->gdb_read_register = openrisc_cpu_gdb_read_register;
424
cc->gdb_write_register = openrisc_cpu_gdb_write_register;
425
#ifndef CONFIG_USER_ONLY
426
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
427
index XXXXXXX..XXXXXXX 100644
428
--- a/target/ppc/cpu_init.c
429
+++ b/target/ppc/cpu_init.c
430
@@ -XXX,XX +XXX,XX @@ static void ppc_cpu_set_pc(CPUState *cs, vaddr value)
431
cpu->env.nip = value;
432
}
433
434
+static vaddr ppc_cpu_get_pc(CPUState *cs)
435
+{
436
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
437
+
438
+ return cpu->env.nip;
439
+}
440
+
441
static bool ppc_cpu_has_work(CPUState *cs)
442
{
443
PowerPCCPU *cpu = POWERPC_CPU(cs);
444
@@ -XXX,XX +XXX,XX @@ static void ppc_cpu_class_init(ObjectClass *oc, void *data)
445
cc->has_work = ppc_cpu_has_work;
446
cc->dump_state = ppc_cpu_dump_state;
447
cc->set_pc = ppc_cpu_set_pc;
448
+ cc->get_pc = ppc_cpu_get_pc;
449
cc->gdb_read_register = ppc_cpu_gdb_read_register;
450
cc->gdb_write_register = ppc_cpu_gdb_write_register;
451
#ifndef CONFIG_USER_ONLY
452
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
453
index XXXXXXX..XXXXXXX 100644
454
--- a/target/riscv/cpu.c
455
+++ b/target/riscv/cpu.c
456
@@ -XXX,XX +XXX,XX @@ static void riscv_cpu_set_pc(CPUState *cs, vaddr value)
457
}
458
}
459
460
+static vaddr riscv_cpu_get_pc(CPUState *cs)
461
+{
462
+ RISCVCPU *cpu = RISCV_CPU(cs);
463
+ CPURISCVState *env = &cpu->env;
464
+
465
+ /* Match cpu_get_tb_cpu_state. */
466
+ if (env->xl == MXL_RV32) {
467
+ return env->pc & UINT32_MAX;
468
+ }
469
+ return env->pc;
470
+}
471
+
472
static void riscv_cpu_synchronize_from_tb(CPUState *cs,
473
const TranslationBlock *tb)
474
{
475
@@ -XXX,XX +XXX,XX @@ static void riscv_cpu_class_init(ObjectClass *c, void *data)
476
cc->has_work = riscv_cpu_has_work;
477
cc->dump_state = riscv_cpu_dump_state;
478
cc->set_pc = riscv_cpu_set_pc;
479
+ cc->get_pc = riscv_cpu_get_pc;
480
cc->gdb_read_register = riscv_cpu_gdb_read_register;
481
cc->gdb_write_register = riscv_cpu_gdb_write_register;
482
cc->gdb_num_core_regs = 33;
483
diff --git a/target/rx/cpu.c b/target/rx/cpu.c
484
index XXXXXXX..XXXXXXX 100644
485
--- a/target/rx/cpu.c
486
+++ b/target/rx/cpu.c
487
@@ -XXX,XX +XXX,XX @@ static void rx_cpu_set_pc(CPUState *cs, vaddr value)
488
cpu->env.pc = value;
489
}
490
491
+static vaddr rx_cpu_get_pc(CPUState *cs)
492
+{
493
+ RXCPU *cpu = RX_CPU(cs);
494
+
495
+ return cpu->env.pc;
496
+}
497
+
498
static void rx_cpu_synchronize_from_tb(CPUState *cs,
499
const TranslationBlock *tb)
500
{
501
@@ -XXX,XX +XXX,XX @@ static void rx_cpu_class_init(ObjectClass *klass, void *data)
502
cc->has_work = rx_cpu_has_work;
503
cc->dump_state = rx_cpu_dump_state;
504
cc->set_pc = rx_cpu_set_pc;
505
+ cc->get_pc = rx_cpu_get_pc;
506
507
#ifndef CONFIG_USER_ONLY
508
cc->sysemu_ops = &rx_sysemu_ops;
509
diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
510
index XXXXXXX..XXXXXXX 100644
511
--- a/target/s390x/cpu.c
512
+++ b/target/s390x/cpu.c
513
@@ -XXX,XX +XXX,XX @@ static void s390_cpu_set_pc(CPUState *cs, vaddr value)
514
cpu->env.psw.addr = value;
515
}
516
517
+static vaddr s390_cpu_get_pc(CPUState *cs)
518
+{
519
+ S390CPU *cpu = S390_CPU(cs);
520
+
521
+ return cpu->env.psw.addr;
522
+}
523
+
524
static bool s390_cpu_has_work(CPUState *cs)
525
{
526
S390CPU *cpu = S390_CPU(cs);
527
@@ -XXX,XX +XXX,XX @@ static void s390_cpu_class_init(ObjectClass *oc, void *data)
528
cc->has_work = s390_cpu_has_work;
529
cc->dump_state = s390_cpu_dump_state;
530
cc->set_pc = s390_cpu_set_pc;
531
+ cc->get_pc = s390_cpu_get_pc;
532
cc->gdb_read_register = s390_cpu_gdb_read_register;
533
cc->gdb_write_register = s390_cpu_gdb_write_register;
534
#ifndef CONFIG_USER_ONLY
535
diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
536
index XXXXXXX..XXXXXXX 100644
537
--- a/target/sh4/cpu.c
538
+++ b/target/sh4/cpu.c
539
@@ -XXX,XX +XXX,XX @@ static void superh_cpu_set_pc(CPUState *cs, vaddr value)
540
cpu->env.pc = value;
541
}
542
543
+static vaddr superh_cpu_get_pc(CPUState *cs)
544
+{
545
+ SuperHCPU *cpu = SUPERH_CPU(cs);
546
+
547
+ return cpu->env.pc;
548
+}
549
+
550
static void superh_cpu_synchronize_from_tb(CPUState *cs,
551
const TranslationBlock *tb)
552
{
553
@@ -XXX,XX +XXX,XX @@ static void superh_cpu_class_init(ObjectClass *oc, void *data)
554
cc->has_work = superh_cpu_has_work;
555
cc->dump_state = superh_cpu_dump_state;
556
cc->set_pc = superh_cpu_set_pc;
557
+ cc->get_pc = superh_cpu_get_pc;
558
cc->gdb_read_register = superh_cpu_gdb_read_register;
559
cc->gdb_write_register = superh_cpu_gdb_write_register;
560
#ifndef CONFIG_USER_ONLY
561
diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c
562
index XXXXXXX..XXXXXXX 100644
563
--- a/target/sparc/cpu.c
564
+++ b/target/sparc/cpu.c
565
@@ -XXX,XX +XXX,XX @@ static void sparc_cpu_set_pc(CPUState *cs, vaddr value)
566
cpu->env.npc = value + 4;
567
}
568
569
+static vaddr sparc_cpu_get_pc(CPUState *cs)
570
+{
571
+ SPARCCPU *cpu = SPARC_CPU(cs);
572
+
573
+ return cpu->env.pc;
574
+}
575
+
576
static void sparc_cpu_synchronize_from_tb(CPUState *cs,
577
const TranslationBlock *tb)
578
{
579
@@ -XXX,XX +XXX,XX @@ static void sparc_cpu_class_init(ObjectClass *oc, void *data)
580
cc->memory_rw_debug = sparc_cpu_memory_rw_debug;
581
#endif
582
cc->set_pc = sparc_cpu_set_pc;
583
+ cc->get_pc = sparc_cpu_get_pc;
584
cc->gdb_read_register = sparc_cpu_gdb_read_register;
585
cc->gdb_write_register = sparc_cpu_gdb_write_register;
586
#ifndef CONFIG_USER_ONLY
587
diff --git a/target/tricore/cpu.c b/target/tricore/cpu.c
588
index XXXXXXX..XXXXXXX 100644
589
--- a/target/tricore/cpu.c
590
+++ b/target/tricore/cpu.c
591
@@ -XXX,XX +XXX,XX @@ static void tricore_cpu_set_pc(CPUState *cs, vaddr value)
592
env->PC = value & ~(target_ulong)1;
593
}
594
595
+static vaddr tricore_cpu_get_pc(CPUState *cs)
596
+{
597
+ TriCoreCPU *cpu = TRICORE_CPU(cs);
598
+ CPUTriCoreState *env = &cpu->env;
599
+
600
+ return env->PC;
601
+}
602
+
603
static void tricore_cpu_synchronize_from_tb(CPUState *cs,
604
const TranslationBlock *tb)
605
{
606
@@ -XXX,XX +XXX,XX @@ static void tricore_cpu_class_init(ObjectClass *c, void *data)
607
608
cc->dump_state = tricore_cpu_dump_state;
609
cc->set_pc = tricore_cpu_set_pc;
610
+ cc->get_pc = tricore_cpu_get_pc;
611
cc->sysemu_ops = &tricore_sysemu_ops;
612
cc->tcg_ops = &tricore_tcg_ops;
613
}
614
diff --git a/target/xtensa/cpu.c b/target/xtensa/cpu.c
615
index XXXXXXX..XXXXXXX 100644
616
--- a/target/xtensa/cpu.c
617
+++ b/target/xtensa/cpu.c
618
@@ -XXX,XX +XXX,XX @@ static void xtensa_cpu_set_pc(CPUState *cs, vaddr value)
619
cpu->env.pc = value;
620
}
621
622
+static vaddr xtensa_cpu_get_pc(CPUState *cs)
623
+{
624
+ XtensaCPU *cpu = XTENSA_CPU(cs);
625
+
626
+ return cpu->env.pc;
627
+}
628
+
629
static bool xtensa_cpu_has_work(CPUState *cs)
630
{
631
#ifndef CONFIG_USER_ONLY
632
@@ -XXX,XX +XXX,XX @@ static void xtensa_cpu_class_init(ObjectClass *oc, void *data)
633
cc->has_work = xtensa_cpu_has_work;
634
cc->dump_state = xtensa_cpu_dump_state;
635
cc->set_pc = xtensa_cpu_set_pc;
636
+ cc->get_pc = xtensa_cpu_get_pc;
637
cc->gdb_read_register = xtensa_cpu_gdb_read_register;
638
cc->gdb_write_register = xtensa_cpu_gdb_write_register;
639
cc->gdb_stop_before_watchpoint = true;
146
--
640
--
147
2.25.1
641
2.34.1
148
642
149
643
diff view generated by jsdifflib
1
Change the return value to bool, because that's what is should
1
The availability of tb->pc will shortly be conditional.
2
have been from the start. Pass the ct mask instead of the whole
2
Introduce accessor functions to minimize ifdefs.
3
TCGArgConstraint, as that's the only part that's relevant.
4
3
5
Change the value argument to int64_t. We will need the extra
4
Pass around a known pc to places like tcg_gen_code,
6
width for 32-bit hosts wanting to match vector constants.
5
where the caller must already have the value.
7
6
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
9
---
11
tcg/tcg.c | 5 ++---
10
accel/tcg/internal.h | 6 ++++
12
tcg/aarch64/tcg-target.c.inc | 5 +----
11
include/exec/exec-all.h | 6 ++++
13
tcg/arm/tcg-target.c.inc | 5 +----
12
include/tcg/tcg.h | 2 +-
14
tcg/i386/tcg-target.c.inc | 4 +---
13
accel/tcg/cpu-exec.c | 46 ++++++++++++++-----------
15
tcg/mips/tcg-target.c.inc | 5 +----
14
accel/tcg/translate-all.c | 37 +++++++++++---------
16
tcg/ppc/tcg-target.c.inc | 4 +---
15
target/arm/cpu.c | 4 +--
17
tcg/riscv/tcg-target.c.inc | 4 +---
16
target/avr/cpu.c | 2 +-
18
tcg/s390/tcg-target.c.inc | 5 +----
17
target/hexagon/cpu.c | 2 +-
19
tcg/sparc/tcg-target.c.inc | 5 +----
18
target/hppa/cpu.c | 4 +--
20
tcg/tci/tcg-target.c.inc | 6 ++----
19
target/i386/tcg/tcg-cpu.c | 2 +-
21
10 files changed, 12 insertions(+), 36 deletions(-)
20
target/loongarch/cpu.c | 2 +-
21
target/microblaze/cpu.c | 2 +-
22
target/mips/tcg/exception.c | 2 +-
23
target/mips/tcg/sysemu/special_helper.c | 2 +-
24
target/openrisc/cpu.c | 2 +-
25
target/riscv/cpu.c | 4 +--
26
target/rx/cpu.c | 2 +-
27
target/sh4/cpu.c | 4 +--
28
target/sparc/cpu.c | 2 +-
29
target/tricore/cpu.c | 2 +-
30
tcg/tcg.c | 8 ++---
31
21 files changed, 82 insertions(+), 61 deletions(-)
22
32
33
diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h
34
index XXXXXXX..XXXXXXX 100644
35
--- a/accel/tcg/internal.h
36
+++ b/accel/tcg/internal.h
37
@@ -XXX,XX +XXX,XX @@ G_NORETURN void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr);
38
void page_init(void);
39
void tb_htable_init(void);
40
41
+/* Return the current PC from CPU, which may be cached in TB. */
42
+static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb)
43
+{
44
+ return tb_pc(tb);
45
+}
46
+
47
#endif /* ACCEL_TCG_INTERNAL_H */
48
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
49
index XXXXXXX..XXXXXXX 100644
50
--- a/include/exec/exec-all.h
51
+++ b/include/exec/exec-all.h
52
@@ -XXX,XX +XXX,XX @@ struct TranslationBlock {
53
uintptr_t jmp_dest[2];
54
};
55
56
+/* Hide the read to avoid ifdefs for TARGET_TB_PCREL. */
57
+static inline target_ulong tb_pc(const TranslationBlock *tb)
58
+{
59
+ return tb->pc;
60
+}
61
+
62
/* Hide the qatomic_read to make code a little easier on the eyes */
63
static inline uint32_t tb_cflags(const TranslationBlock *tb)
64
{
65
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
66
index XXXXXXX..XXXXXXX 100644
67
--- a/include/tcg/tcg.h
68
+++ b/include/tcg/tcg.h
69
@@ -XXX,XX +XXX,XX @@ void tcg_register_thread(void);
70
void tcg_prologue_init(TCGContext *s);
71
void tcg_func_start(TCGContext *s);
72
73
-int tcg_gen_code(TCGContext *s, TranslationBlock *tb);
74
+int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start);
75
76
void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size);
77
78
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
79
index XXXXXXX..XXXXXXX 100644
80
--- a/accel/tcg/cpu-exec.c
81
+++ b/accel/tcg/cpu-exec.c
82
@@ -XXX,XX +XXX,XX @@ static bool tb_lookup_cmp(const void *p, const void *d)
83
const TranslationBlock *tb = p;
84
const struct tb_desc *desc = d;
85
86
- if (tb->pc == desc->pc &&
87
+ if (tb_pc(tb) == desc->pc &&
88
tb->page_addr[0] == desc->page_addr0 &&
89
tb->cs_base == desc->cs_base &&
90
tb->flags == desc->flags &&
91
@@ -XXX,XX +XXX,XX @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
92
return tb;
93
}
94
95
-static inline void log_cpu_exec(target_ulong pc, CPUState *cpu,
96
- const TranslationBlock *tb)
97
+static void log_cpu_exec(target_ulong pc, CPUState *cpu,
98
+ const TranslationBlock *tb)
99
{
100
- if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC))
101
- && qemu_log_in_addr_range(pc)) {
102
-
103
+ if (qemu_log_in_addr_range(pc)) {
104
qemu_log_mask(CPU_LOG_EXEC,
105
"Trace %d: %p [" TARGET_FMT_lx
106
"/" TARGET_FMT_lx "/%08x/%08x] %s\n",
107
@@ -XXX,XX +XXX,XX @@ const void *HELPER(lookup_tb_ptr)(CPUArchState *env)
108
return tcg_code_gen_epilogue;
109
}
110
111
- log_cpu_exec(pc, cpu, tb);
112
+ if (qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC)) {
113
+ log_cpu_exec(pc, cpu, tb);
114
+ }
115
116
return tb->tc.ptr;
117
}
118
@@ -XXX,XX +XXX,XX @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
119
TranslationBlock *last_tb;
120
const void *tb_ptr = itb->tc.ptr;
121
122
- log_cpu_exec(itb->pc, cpu, itb);
123
+ if (qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC)) {
124
+ log_cpu_exec(log_pc(cpu, itb), cpu, itb);
125
+ }
126
127
qemu_thread_jit_execute();
128
ret = tcg_qemu_tb_exec(env, tb_ptr);
129
@@ -XXX,XX +XXX,XX @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
130
* of the start of the TB.
131
*/
132
CPUClass *cc = CPU_GET_CLASS(cpu);
133
- qemu_log_mask_and_addr(CPU_LOG_EXEC, last_tb->pc,
134
- "Stopped execution of TB chain before %p ["
135
- TARGET_FMT_lx "] %s\n",
136
- last_tb->tc.ptr, last_tb->pc,
137
- lookup_symbol(last_tb->pc));
138
+
139
if (cc->tcg_ops->synchronize_from_tb) {
140
cc->tcg_ops->synchronize_from_tb(cpu, last_tb);
141
} else {
142
assert(cc->set_pc);
143
- cc->set_pc(cpu, last_tb->pc);
144
+ cc->set_pc(cpu, tb_pc(last_tb));
145
+ }
146
+ if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
147
+ target_ulong pc = log_pc(cpu, last_tb);
148
+ if (qemu_log_in_addr_range(pc)) {
149
+ qemu_log("Stopped execution of TB chain before %p ["
150
+ TARGET_FMT_lx "] %s\n",
151
+ last_tb->tc.ptr, pc, lookup_symbol(pc));
152
+ }
153
}
154
}
155
156
@@ -XXX,XX +XXX,XX @@ static inline void tb_add_jump(TranslationBlock *tb, int n,
157
158
qemu_spin_unlock(&tb_next->jmp_lock);
159
160
- qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
161
- "Linking TBs %p [" TARGET_FMT_lx
162
- "] index %d -> %p [" TARGET_FMT_lx "]\n",
163
- tb->tc.ptr, tb->pc, n,
164
- tb_next->tc.ptr, tb_next->pc);
165
+ qemu_log_mask(CPU_LOG_EXEC, "Linking TBs %p index %d -> %p\n",
166
+ tb->tc.ptr, n, tb_next->tc.ptr);
167
return;
168
169
out_unlock_next:
170
@@ -XXX,XX +XXX,XX @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
171
}
172
173
static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
174
+ target_ulong pc,
175
TranslationBlock **last_tb, int *tb_exit)
176
{
177
int32_t insns_left;
178
179
- trace_exec_tb(tb, tb->pc);
180
+ trace_exec_tb(tb, pc);
181
tb = cpu_tb_exec(cpu, tb, tb_exit);
182
if (*tb_exit != TB_EXIT_REQUESTED) {
183
*last_tb = tb;
184
@@ -XXX,XX +XXX,XX @@ int cpu_exec(CPUState *cpu)
185
tb_add_jump(last_tb, tb_exit, tb);
186
}
187
188
- cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit);
189
+ cpu_loop_exec_tb(cpu, tb, pc, &last_tb, &tb_exit);
190
191
/* Try to align the host and virtual clocks
192
if the guest is in advance */
193
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
194
index XXXXXXX..XXXXXXX 100644
195
--- a/accel/tcg/translate-all.c
196
+++ b/accel/tcg/translate-all.c
197
@@ -XXX,XX +XXX,XX @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
198
199
for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
200
if (i == 0) {
201
- prev = (j == 0 ? tb->pc : 0);
202
+ prev = (j == 0 ? tb_pc(tb) : 0);
203
} else {
204
prev = tcg_ctx->gen_insn_data[i - 1][j];
205
}
206
@@ -XXX,XX +XXX,XX @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
207
static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
208
uintptr_t searched_pc, bool reset_icount)
209
{
210
- target_ulong data[TARGET_INSN_START_WORDS] = { tb->pc };
211
+ target_ulong data[TARGET_INSN_START_WORDS] = { tb_pc(tb) };
212
uintptr_t host_pc = (uintptr_t)tb->tc.ptr;
213
CPUArchState *env = cpu->env_ptr;
214
const uint8_t *p = tb->tc.ptr + tb->tc.size;
215
@@ -XXX,XX +XXX,XX @@ static bool tb_cmp(const void *ap, const void *bp)
216
const TranslationBlock *a = ap;
217
const TranslationBlock *b = bp;
218
219
- return a->pc == b->pc &&
220
+ return tb_pc(a) == tb_pc(b) &&
221
a->cs_base == b->cs_base &&
222
a->flags == b->flags &&
223
(tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
224
@@ -XXX,XX +XXX,XX @@ static void do_tb_invalidate_check(void *p, uint32_t hash, void *userp)
225
TranslationBlock *tb = p;
226
target_ulong addr = *(target_ulong *)userp;
227
228
- if (!(addr + TARGET_PAGE_SIZE <= tb->pc || addr >= tb->pc + tb->size)) {
229
+ if (!(addr + TARGET_PAGE_SIZE <= tb_pc(tb) ||
230
+ addr >= tb_pc(tb) + tb->size)) {
231
printf("ERROR invalidate: address=" TARGET_FMT_lx
232
- " PC=%08lx size=%04x\n", addr, (long)tb->pc, tb->size);
233
+ " PC=%08lx size=%04x\n", addr, (long)tb_pc(tb), tb->size);
234
}
235
}
236
237
@@ -XXX,XX +XXX,XX @@ static void do_tb_page_check(void *p, uint32_t hash, void *userp)
238
TranslationBlock *tb = p;
239
int flags1, flags2;
240
241
- flags1 = page_get_flags(tb->pc);
242
- flags2 = page_get_flags(tb->pc + tb->size - 1);
243
+ flags1 = page_get_flags(tb_pc(tb));
244
+ flags2 = page_get_flags(tb_pc(tb) + tb->size - 1);
245
if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
246
printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
247
- (long)tb->pc, tb->size, flags1, flags2);
248
+ (long)tb_pc(tb), tb->size, flags1, flags2);
249
}
250
}
251
252
@@ -XXX,XX +XXX,XX @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
253
254
/* remove the TB from the hash list */
255
phys_pc = tb->page_addr[0];
256
- h = tb_hash_func(phys_pc, tb->pc, tb->flags, orig_cflags,
257
+ h = tb_hash_func(phys_pc, tb_pc(tb), tb->flags, orig_cflags,
258
tb->trace_vcpu_dstate);
259
if (!qht_remove(&tb_ctx.htable, tb, h)) {
260
return;
261
@@ -XXX,XX +XXX,XX @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
262
}
263
264
/* add in the hash table */
265
- h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags,
266
+ h = tb_hash_func(phys_pc, tb_pc(tb), tb->flags, tb->cflags,
267
tb->trace_vcpu_dstate);
268
qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
269
270
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
271
tcg_ctx->cpu = NULL;
272
max_insns = tb->icount;
273
274
- trace_translate_block(tb, tb->pc, tb->tc.ptr);
275
+ trace_translate_block(tb, pc, tb->tc.ptr);
276
277
/* generate machine code */
278
tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID;
279
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
280
ti = profile_getclock();
281
#endif
282
283
- gen_code_size = tcg_gen_code(tcg_ctx, tb);
284
+ gen_code_size = tcg_gen_code(tcg_ctx, tb, pc);
285
if (unlikely(gen_code_size < 0)) {
286
error_return:
287
switch (gen_code_size) {
288
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
289
290
#ifdef DEBUG_DISAS
291
if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
292
- qemu_log_in_addr_range(tb->pc)) {
293
+ qemu_log_in_addr_range(pc)) {
294
FILE *logfile = qemu_log_trylock();
295
if (logfile) {
296
int code_size, data_size;
297
@@ -XXX,XX +XXX,XX @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
298
*/
299
cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
300
301
- qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
302
- "cpu_io_recompile: rewound execution of TB to "
303
- TARGET_FMT_lx "\n", tb->pc);
304
+ if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
305
+ target_ulong pc = log_pc(cpu, tb);
306
+ if (qemu_log_in_addr_range(pc)) {
307
+ qemu_log("cpu_io_recompile: rewound execution of TB to "
308
+ TARGET_FMT_lx "\n", pc);
309
+ }
310
+ }
311
312
cpu_loop_exit_noexc(cpu);
313
}
314
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
315
index XXXXXXX..XXXXXXX 100644
316
--- a/target/arm/cpu.c
317
+++ b/target/arm/cpu.c
318
@@ -XXX,XX +XXX,XX @@ void arm_cpu_synchronize_from_tb(CPUState *cs,
319
* never possible for an AArch64 TB to chain to an AArch32 TB.
320
*/
321
if (is_a64(env)) {
322
- env->pc = tb->pc;
323
+ env->pc = tb_pc(tb);
324
} else {
325
- env->regs[15] = tb->pc;
326
+ env->regs[15] = tb_pc(tb);
327
}
328
}
329
#endif /* CONFIG_TCG */
330
diff --git a/target/avr/cpu.c b/target/avr/cpu.c
331
index XXXXXXX..XXXXXXX 100644
332
--- a/target/avr/cpu.c
333
+++ b/target/avr/cpu.c
334
@@ -XXX,XX +XXX,XX @@ static void avr_cpu_synchronize_from_tb(CPUState *cs,
335
AVRCPU *cpu = AVR_CPU(cs);
336
CPUAVRState *env = &cpu->env;
337
338
- env->pc_w = tb->pc / 2; /* internally PC points to words */
339
+ env->pc_w = tb_pc(tb) / 2; /* internally PC points to words */
340
}
341
342
static void avr_cpu_reset(DeviceState *ds)
343
diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c
344
index XXXXXXX..XXXXXXX 100644
345
--- a/target/hexagon/cpu.c
346
+++ b/target/hexagon/cpu.c
347
@@ -XXX,XX +XXX,XX @@ static void hexagon_cpu_synchronize_from_tb(CPUState *cs,
348
{
349
HexagonCPU *cpu = HEXAGON_CPU(cs);
350
CPUHexagonState *env = &cpu->env;
351
- env->gpr[HEX_REG_PC] = tb->pc;
352
+ env->gpr[HEX_REG_PC] = tb_pc(tb);
353
}
354
355
static bool hexagon_cpu_has_work(CPUState *cs)
356
diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c
357
index XXXXXXX..XXXXXXX 100644
358
--- a/target/hppa/cpu.c
359
+++ b/target/hppa/cpu.c
360
@@ -XXX,XX +XXX,XX @@ static void hppa_cpu_synchronize_from_tb(CPUState *cs,
361
HPPACPU *cpu = HPPA_CPU(cs);
362
363
#ifdef CONFIG_USER_ONLY
364
- cpu->env.iaoq_f = tb->pc;
365
+ cpu->env.iaoq_f = tb_pc(tb);
366
cpu->env.iaoq_b = tb->cs_base;
367
#else
368
/* Recover the IAOQ values from the GVA + PRIV. */
369
@@ -XXX,XX +XXX,XX @@ static void hppa_cpu_synchronize_from_tb(CPUState *cs,
370
int32_t diff = cs_base;
371
372
cpu->env.iasq_f = iasq_f;
373
- cpu->env.iaoq_f = (tb->pc & ~iasq_f) + priv;
374
+ cpu->env.iaoq_f = (tb_pc(tb) & ~iasq_f) + priv;
375
if (diff) {
376
cpu->env.iaoq_b = cpu->env.iaoq_f + diff;
377
}
378
diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c
379
index XXXXXXX..XXXXXXX 100644
380
--- a/target/i386/tcg/tcg-cpu.c
381
+++ b/target/i386/tcg/tcg-cpu.c
382
@@ -XXX,XX +XXX,XX @@ static void x86_cpu_synchronize_from_tb(CPUState *cs,
383
{
384
X86CPU *cpu = X86_CPU(cs);
385
386
- cpu->env.eip = tb->pc - tb->cs_base;
387
+ cpu->env.eip = tb_pc(tb) - tb->cs_base;
388
}
389
390
#ifndef CONFIG_USER_ONLY
391
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
392
index XXXXXXX..XXXXXXX 100644
393
--- a/target/loongarch/cpu.c
394
+++ b/target/loongarch/cpu.c
395
@@ -XXX,XX +XXX,XX @@ static void loongarch_cpu_synchronize_from_tb(CPUState *cs,
396
LoongArchCPU *cpu = LOONGARCH_CPU(cs);
397
CPULoongArchState *env = &cpu->env;
398
399
- env->pc = tb->pc;
400
+ env->pc = tb_pc(tb);
401
}
402
#endif /* CONFIG_TCG */
403
404
diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c
405
index XXXXXXX..XXXXXXX 100644
406
--- a/target/microblaze/cpu.c
407
+++ b/target/microblaze/cpu.c
408
@@ -XXX,XX +XXX,XX @@ static void mb_cpu_synchronize_from_tb(CPUState *cs,
409
{
410
MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs);
411
412
- cpu->env.pc = tb->pc;
413
+ cpu->env.pc = tb_pc(tb);
414
cpu->env.iflags = tb->flags & IFLAGS_TB_MASK;
415
}
416
417
diff --git a/target/mips/tcg/exception.c b/target/mips/tcg/exception.c
418
index XXXXXXX..XXXXXXX 100644
419
--- a/target/mips/tcg/exception.c
420
+++ b/target/mips/tcg/exception.c
421
@@ -XXX,XX +XXX,XX @@ void mips_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb)
422
MIPSCPU *cpu = MIPS_CPU(cs);
423
CPUMIPSState *env = &cpu->env;
424
425
- env->active_tc.PC = tb->pc;
426
+ env->active_tc.PC = tb_pc(tb);
427
env->hflags &= ~MIPS_HFLAG_BMASK;
428
env->hflags |= tb->flags & MIPS_HFLAG_BMASK;
429
}
430
diff --git a/target/mips/tcg/sysemu/special_helper.c b/target/mips/tcg/sysemu/special_helper.c
431
index XXXXXXX..XXXXXXX 100644
432
--- a/target/mips/tcg/sysemu/special_helper.c
433
+++ b/target/mips/tcg/sysemu/special_helper.c
434
@@ -XXX,XX +XXX,XX @@ bool mips_io_recompile_replay_branch(CPUState *cs, const TranslationBlock *tb)
435
CPUMIPSState *env = &cpu->env;
436
437
if ((env->hflags & MIPS_HFLAG_BMASK) != 0
438
- && env->active_tc.PC != tb->pc) {
439
+ && env->active_tc.PC != tb_pc(tb)) {
440
env->active_tc.PC -= (env->hflags & MIPS_HFLAG_B16 ? 2 : 4);
441
env->hflags &= ~MIPS_HFLAG_BMASK;
442
return true;
443
diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c
444
index XXXXXXX..XXXXXXX 100644
445
--- a/target/openrisc/cpu.c
446
+++ b/target/openrisc/cpu.c
447
@@ -XXX,XX +XXX,XX @@ static void openrisc_cpu_synchronize_from_tb(CPUState *cs,
448
{
449
OpenRISCCPU *cpu = OPENRISC_CPU(cs);
450
451
- cpu->env.pc = tb->pc;
452
+ cpu->env.pc = tb_pc(tb);
453
}
454
455
456
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
457
index XXXXXXX..XXXXXXX 100644
458
--- a/target/riscv/cpu.c
459
+++ b/target/riscv/cpu.c
460
@@ -XXX,XX +XXX,XX @@ static void riscv_cpu_synchronize_from_tb(CPUState *cs,
461
RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL);
462
463
if (xl == MXL_RV32) {
464
- env->pc = (int32_t)tb->pc;
465
+ env->pc = (int32_t)tb_pc(tb);
466
} else {
467
- env->pc = tb->pc;
468
+ env->pc = tb_pc(tb);
469
}
470
}
471
472
diff --git a/target/rx/cpu.c b/target/rx/cpu.c
473
index XXXXXXX..XXXXXXX 100644
474
--- a/target/rx/cpu.c
475
+++ b/target/rx/cpu.c
476
@@ -XXX,XX +XXX,XX @@ static void rx_cpu_synchronize_from_tb(CPUState *cs,
477
{
478
RXCPU *cpu = RX_CPU(cs);
479
480
- cpu->env.pc = tb->pc;
481
+ cpu->env.pc = tb_pc(tb);
482
}
483
484
static bool rx_cpu_has_work(CPUState *cs)
485
diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
486
index XXXXXXX..XXXXXXX 100644
487
--- a/target/sh4/cpu.c
488
+++ b/target/sh4/cpu.c
489
@@ -XXX,XX +XXX,XX @@ static void superh_cpu_synchronize_from_tb(CPUState *cs,
490
{
491
SuperHCPU *cpu = SUPERH_CPU(cs);
492
493
- cpu->env.pc = tb->pc;
494
+ cpu->env.pc = tb_pc(tb);
495
cpu->env.flags = tb->flags & TB_FLAG_ENVFLAGS_MASK;
496
}
497
498
@@ -XXX,XX +XXX,XX @@ static bool superh_io_recompile_replay_branch(CPUState *cs,
499
CPUSH4State *env = &cpu->env;
500
501
if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
502
- && env->pc != tb->pc) {
503
+ && env->pc != tb_pc(tb)) {
504
env->pc -= 2;
505
env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
506
return true;
507
diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c
508
index XXXXXXX..XXXXXXX 100644
509
--- a/target/sparc/cpu.c
510
+++ b/target/sparc/cpu.c
511
@@ -XXX,XX +XXX,XX @@ static void sparc_cpu_synchronize_from_tb(CPUState *cs,
512
{
513
SPARCCPU *cpu = SPARC_CPU(cs);
514
515
- cpu->env.pc = tb->pc;
516
+ cpu->env.pc = tb_pc(tb);
517
cpu->env.npc = tb->cs_base;
518
}
519
520
diff --git a/target/tricore/cpu.c b/target/tricore/cpu.c
521
index XXXXXXX..XXXXXXX 100644
522
--- a/target/tricore/cpu.c
523
+++ b/target/tricore/cpu.c
524
@@ -XXX,XX +XXX,XX @@ static void tricore_cpu_synchronize_from_tb(CPUState *cs,
525
TriCoreCPU *cpu = TRICORE_CPU(cs);
526
CPUTriCoreState *env = &cpu->env;
527
528
- env->PC = tb->pc;
529
+ env->PC = tb_pc(tb);
530
}
531
532
static void tricore_cpu_reset(DeviceState *dev)
23
diff --git a/tcg/tcg.c b/tcg/tcg.c
533
diff --git a/tcg/tcg.c b/tcg/tcg.c
24
index XXXXXXX..XXXXXXX 100644
534
index XXXXXXX..XXXXXXX 100644
25
--- a/tcg/tcg.c
535
--- a/tcg/tcg.c
26
+++ b/tcg/tcg.c
536
+++ b/tcg/tcg.c
27
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
537
@@ -XXX,XX +XXX,XX @@ int64_t tcg_cpu_exec_time(void)
28
static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
29
TCGReg base, intptr_t ofs);
30
static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
31
-static int tcg_target_const_match(tcg_target_long val, TCGType type,
32
- const TCGArgConstraint *arg_ct);
33
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
34
#ifdef TCG_TARGET_NEED_LDST_LABELS
35
static int tcg_out_ldst_finalize(TCGContext *s);
36
#endif
538
#endif
37
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
539
38
ts = arg_temp(arg);
540
39
541
-int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
40
if (ts->val_type == TEMP_VAL_CONST
542
+int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
41
- && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
543
{
42
+ && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
544
#ifdef CONFIG_PROFILER
43
/* constant is OK for instruction */
545
TCGProfile *prof = &s->prof;
44
const_args[i] = 1;
546
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
45
new_args[i] = ts->val;
547
46
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
548
#ifdef DEBUG_DISAS
47
index XXXXXXX..XXXXXXX 100644
549
if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
48
--- a/tcg/aarch64/tcg-target.c.inc
550
- && qemu_log_in_addr_range(tb->pc))) {
49
+++ b/tcg/aarch64/tcg-target.c.inc
551
+ && qemu_log_in_addr_range(pc_start))) {
50
@@ -XXX,XX +XXX,XX @@ static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
552
FILE *logfile = qemu_log_trylock();
51
}
553
if (logfile) {
52
}
554
fprintf(logfile, "OP:\n");
53
555
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
54
-static int tcg_target_const_match(tcg_target_long val, TCGType type,
556
if (s->nb_indirects > 0) {
55
- const TCGArgConstraint *arg_ct)
557
#ifdef DEBUG_DISAS
56
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
558
if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
57
{
559
- && qemu_log_in_addr_range(tb->pc))) {
58
- int ct = arg_ct->ct;
560
+ && qemu_log_in_addr_range(pc_start))) {
59
-
561
FILE *logfile = qemu_log_trylock();
60
if (ct & TCG_CT_CONST) {
562
if (logfile) {
61
return 1;
563
fprintf(logfile, "OP before indirect lowering:\n");
62
}
564
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
63
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
565
64
index XXXXXXX..XXXXXXX 100644
566
#ifdef DEBUG_DISAS
65
--- a/tcg/arm/tcg-target.c.inc
567
if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
66
+++ b/tcg/arm/tcg-target.c.inc
568
- && qemu_log_in_addr_range(tb->pc))) {
67
@@ -XXX,XX +XXX,XX @@ static inline int check_fit_imm(uint32_t imm)
569
+ && qemu_log_in_addr_range(pc_start))) {
68
* mov operand2: values represented with x << (2 * y), x < 0x100
570
FILE *logfile = qemu_log_trylock();
69
* add, sub, eor...: ditto
571
if (logfile) {
70
*/
572
fprintf(logfile, "OP after optimization and liveness analysis:\n");
71
-static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
72
- const TCGArgConstraint *arg_ct)
73
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
74
{
75
- int ct;
76
- ct = arg_ct->ct;
77
if (ct & TCG_CT_CONST) {
78
return 1;
79
} else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) {
80
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
81
index XXXXXXX..XXXXXXX 100644
82
--- a/tcg/i386/tcg-target.c.inc
83
+++ b/tcg/i386/tcg-target.c.inc
84
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
85
}
86
87
/* test if a constant matches the constraint */
88
-static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
89
- const TCGArgConstraint *arg_ct)
90
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
91
{
92
- int ct = arg_ct->ct;
93
if (ct & TCG_CT_CONST) {
94
return 1;
95
}
96
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
97
index XXXXXXX..XXXXXXX 100644
98
--- a/tcg/mips/tcg-target.c.inc
99
+++ b/tcg/mips/tcg-target.c.inc
100
@@ -XXX,XX +XXX,XX @@ static inline bool is_p2m1(tcg_target_long val)
101
}
102
103
/* test if a constant matches the constraint */
104
-static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
105
- const TCGArgConstraint *arg_ct)
106
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
107
{
108
- int ct;
109
- ct = arg_ct->ct;
110
if (ct & TCG_CT_CONST) {
111
return 1;
112
} else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
113
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
114
index XXXXXXX..XXXXXXX 100644
115
--- a/tcg/ppc/tcg-target.c.inc
116
+++ b/tcg/ppc/tcg-target.c.inc
117
@@ -XXX,XX +XXX,XX @@ static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
118
}
119
120
/* test if a constant matches the constraint */
121
-static int tcg_target_const_match(tcg_target_long val, TCGType type,
122
- const TCGArgConstraint *arg_ct)
123
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
124
{
125
- int ct = arg_ct->ct;
126
if (ct & TCG_CT_CONST) {
127
return 1;
128
}
129
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
130
index XXXXXXX..XXXXXXX 100644
131
--- a/tcg/riscv/tcg-target.c.inc
132
+++ b/tcg/riscv/tcg-target.c.inc
133
@@ -XXX,XX +XXX,XX @@ static inline tcg_target_long sextreg(tcg_target_long val, int pos, int len)
134
}
135
136
/* test if a constant matches the constraint */
137
-static int tcg_target_const_match(tcg_target_long val, TCGType type,
138
- const TCGArgConstraint *arg_ct)
139
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
140
{
141
- int ct = arg_ct->ct;
142
if (ct & TCG_CT_CONST) {
143
return 1;
144
}
145
diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc
146
index XXXXXXX..XXXXXXX 100644
147
--- a/tcg/s390/tcg-target.c.inc
148
+++ b/tcg/s390/tcg-target.c.inc
149
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *src_rw, int type,
150
}
151
152
/* Test if a constant matches the constraint. */
153
-static int tcg_target_const_match(tcg_target_long val, TCGType type,
154
- const TCGArgConstraint *arg_ct)
155
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
156
{
157
- int ct = arg_ct->ct;
158
-
159
if (ct & TCG_CT_CONST) {
160
return 1;
161
}
162
diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
163
index XXXXXXX..XXXXXXX 100644
164
--- a/tcg/sparc/tcg-target.c.inc
165
+++ b/tcg/sparc/tcg-target.c.inc
166
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *src_rw, int type,
167
}
168
169
/* test if a constant matches the constraint */
170
-static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
171
- const TCGArgConstraint *arg_ct)
172
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
173
{
174
- int ct = arg_ct->ct;
175
-
176
if (ct & TCG_CT_CONST) {
177
return 1;
178
}
179
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
180
index XXXXXXX..XXXXXXX 100644
181
--- a/tcg/tci/tcg-target.c.inc
182
+++ b/tcg/tci/tcg-target.c.inc
183
@@ -XXX,XX +XXX,XX @@ static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
184
}
185
186
/* Test if a constant matches the constraint. */
187
-static int tcg_target_const_match(tcg_target_long val, TCGType type,
188
- const TCGArgConstraint *arg_ct)
189
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
190
{
191
- /* No need to return 0 or 1, 0 or != 0 is good enough. */
192
- return arg_ct->ct & TCG_CT_CONST;
193
+ return ct & TCG_CT_CONST;
194
}
195
196
static void tcg_target_init(TCGContext *s)
197
--
573
--
198
2.25.1
574
2.34.1
199
575
200
576
diff view generated by jsdifflib
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
1
Prepare for targets to be able to produce TBs that can
2
run in more than one virtual context.
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
6
---
4
tcg/arm/tcg-target.c.inc | 52 +++++++++++++++++++++++++++++++++++-----
7
accel/tcg/internal.h | 4 +++
5
1 file changed, 46 insertions(+), 6 deletions(-)
8
accel/tcg/tb-jmp-cache.h | 41 +++++++++++++++++++++++++
9
include/exec/cpu-defs.h | 3 ++
10
include/exec/exec-all.h | 32 ++++++++++++++++++--
11
accel/tcg/cpu-exec.c | 16 ++++++----
12
accel/tcg/translate-all.c | 64 ++++++++++++++++++++++++++-------------
13
6 files changed, 131 insertions(+), 29 deletions(-)
6
14
7
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
15
diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h
8
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/arm/tcg-target.c.inc
17
--- a/accel/tcg/internal.h
10
+++ b/tcg/arm/tcg-target.c.inc
18
+++ b/accel/tcg/internal.h
11
@@ -XXX,XX +XXX,XX @@ typedef enum {
19
@@ -XXX,XX +XXX,XX @@ void tb_htable_init(void);
12
/* Otherwise the assembler uses mov r0,r0 */
20
/* Return the current PC from CPU, which may be cached in TB. */
13
INSN_NOP_v4 = (COND_AL << 28) | ARITH_MOV,
21
static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb)
14
22
{
15
+ INSN_VORR = 0xf2200110,
23
+#if TARGET_TB_PCREL
16
+
24
+ return cpu->cc->get_pc(cpu);
17
INSN_VLD1 = 0xf4200000, /* VLD1 (multiple single elements) */
25
+#else
18
INSN_VST1 = 0xf4000000, /* VST1 (multiple single elements) */
26
return tb_pc(tb);
19
} ARMInsn;
27
+#endif
20
@@ -XXX,XX +XXX,XX @@ static uint32_t encode_vd(TCGReg rd)
28
}
21
return (extract32(rd, 3, 1) << 22) | (extract32(rd, 0, 3) << 13);
29
22
}
30
#endif /* ACCEL_TCG_INTERNAL_H */
23
31
diff --git a/accel/tcg/tb-jmp-cache.h b/accel/tcg/tb-jmp-cache.h
24
+static uint32_t encode_vn(TCGReg rn)
32
index XXXXXXX..XXXXXXX 100644
33
--- a/accel/tcg/tb-jmp-cache.h
34
+++ b/accel/tcg/tb-jmp-cache.h
35
@@ -XXX,XX +XXX,XX @@
36
37
/*
38
* Accessed in parallel; all accesses to 'tb' must be atomic.
39
+ * For TARGET_TB_PCREL, accesses to 'pc' must be protected by
40
+ * a load_acquire/store_release to 'tb'.
41
*/
42
struct CPUJumpCache {
43
struct {
44
TranslationBlock *tb;
45
+#if TARGET_TB_PCREL
46
+ target_ulong pc;
47
+#endif
48
} array[TB_JMP_CACHE_SIZE];
49
};
50
51
+static inline TranslationBlock *
52
+tb_jmp_cache_get_tb(CPUJumpCache *jc, uint32_t hash)
25
+{
53
+{
26
+ tcg_debug_assert(rn >= TCG_REG_Q0);
54
+#if TARGET_TB_PCREL
27
+ return (extract32(rn, 3, 1) << 7) | (extract32(rn, 0, 3) << 17);
55
+ /* Use acquire to ensure current load of pc from jc. */
56
+ return qatomic_load_acquire(&jc->array[hash].tb);
57
+#else
58
+ /* Use rcu_read to ensure current load of pc from *tb. */
59
+ return qatomic_rcu_read(&jc->array[hash].tb);
60
+#endif
28
+}
61
+}
29
+
62
+
30
+static uint32_t encode_vm(TCGReg rm)
63
+static inline target_ulong
64
+tb_jmp_cache_get_pc(CPUJumpCache *jc, uint32_t hash, TranslationBlock *tb)
31
+{
65
+{
32
+ tcg_debug_assert(rm >= TCG_REG_Q0);
66
+#if TARGET_TB_PCREL
33
+ return (extract32(rm, 3, 1) << 5) | (extract32(rm, 0, 3) << 1);
67
+ return jc->array[hash].pc;
68
+#else
69
+ return tb_pc(tb);
70
+#endif
34
+}
71
+}
35
+
72
+
36
+static void tcg_out_vreg3(TCGContext *s, ARMInsn insn, int q, int vece,
73
+static inline void
37
+ TCGReg d, TCGReg n, TCGReg m)
74
+tb_jmp_cache_set(CPUJumpCache *jc, uint32_t hash,
75
+ TranslationBlock *tb, target_ulong pc)
38
+{
76
+{
39
+ tcg_out32(s, insn | (vece << 20) | (q << 6) |
77
+#if TARGET_TB_PCREL
40
+ encode_vd(d) | encode_vn(n) | encode_vm(m));
78
+ jc->array[hash].pc = pc;
79
+ /* Use store_release on tb to ensure pc is written first. */
80
+ qatomic_store_release(&jc->array[hash].tb, tb);
81
+#else
82
+ /* Use the pc value already stored in tb->pc. */
83
+ qatomic_set(&jc->array[hash].tb, tb);
84
+#endif
41
+}
85
+}
42
+
86
+
43
static void tcg_out_vldst(TCGContext *s, ARMInsn insn,
87
#endif /* ACCEL_TCG_TB_JMP_CACHE_H */
44
TCGReg rd, TCGReg rn, int offset)
88
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
45
{
89
index XXXXXXX..XXXXXXX 100644
46
@@ -XXX,XX +XXX,XX @@ static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
90
--- a/include/exec/cpu-defs.h
47
return false;
91
+++ b/include/exec/cpu-defs.h
48
}
92
@@ -XXX,XX +XXX,XX @@
49
93
# error TARGET_PAGE_BITS must be defined in cpu-param.h
50
-static inline bool tcg_out_mov(TCGContext *s, TCGType type,
94
# endif
51
- TCGReg ret, TCGReg arg)
95
#endif
52
+static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
96
+#ifndef TARGET_TB_PCREL
53
{
97
+# define TARGET_TB_PCREL 0
54
- tcg_out_mov_reg(s, COND_AL, ret, arg);
98
+#endif
55
- return true;
99
56
+ if (ret == arg) {
100
#define TARGET_LONG_SIZE (TARGET_LONG_BITS / 8)
57
+ return true;
101
102
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
103
index XXXXXXX..XXXXXXX 100644
104
--- a/include/exec/exec-all.h
105
+++ b/include/exec/exec-all.h
106
@@ -XXX,XX +XXX,XX @@ struct tb_tc {
107
};
108
109
struct TranslationBlock {
110
- target_ulong pc; /* simulated PC corresponding to this block (EIP + CS base) */
111
- target_ulong cs_base; /* CS base for this block */
112
+#if !TARGET_TB_PCREL
113
+ /*
114
+ * Guest PC corresponding to this block. This must be the true
115
+ * virtual address. Therefore e.g. x86 stores EIP + CS_BASE, and
116
+ * targets like Arm, MIPS, HP-PA, which reuse low bits for ISA or
117
+ * privilege, must store those bits elsewhere.
118
+ *
119
+ * If TARGET_TB_PCREL, the opcodes for the TranslationBlock are
120
+ * written such that the TB is associated only with the physical
121
+ * page and may be run in any virtual address context. In this case,
122
+ * PC must always be taken from ENV in a target-specific manner.
123
+ * Unwind information is taken as offsets from the page, to be
124
+ * deposited into the "current" PC.
125
+ */
126
+ target_ulong pc;
127
+#endif
128
+
129
+ /*
130
+ * Target-specific data associated with the TranslationBlock, e.g.:
131
+ * x86: the original user, the Code Segment virtual base,
132
+ * arm: an extension of tb->flags,
133
+ * s390x: instruction data for EXECUTE,
134
+ * sparc: the next pc of the instruction queue (for delay slots).
135
+ */
136
+ target_ulong cs_base;
137
+
138
uint32_t flags; /* flags defining in which context the code was generated */
139
uint32_t cflags; /* compile flags */
140
141
@@ -XXX,XX +XXX,XX @@ struct TranslationBlock {
142
/* Hide the read to avoid ifdefs for TARGET_TB_PCREL. */
143
static inline target_ulong tb_pc(const TranslationBlock *tb)
144
{
145
+#if TARGET_TB_PCREL
146
+ qemu_build_not_reached();
147
+#else
148
return tb->pc;
149
+#endif
150
}
151
152
/* Hide the qatomic_read to make code a little easier on the eyes */
153
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
154
index XXXXXXX..XXXXXXX 100644
155
--- a/accel/tcg/cpu-exec.c
156
+++ b/accel/tcg/cpu-exec.c
157
@@ -XXX,XX +XXX,XX @@ static bool tb_lookup_cmp(const void *p, const void *d)
158
const TranslationBlock *tb = p;
159
const struct tb_desc *desc = d;
160
161
- if (tb_pc(tb) == desc->pc &&
162
+ if ((TARGET_TB_PCREL || tb_pc(tb) == desc->pc) &&
163
tb->page_addr[0] == desc->page_addr0 &&
164
tb->cs_base == desc->cs_base &&
165
tb->flags == desc->flags &&
166
@@ -XXX,XX +XXX,XX @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
167
return NULL;
168
}
169
desc.page_addr0 = phys_pc;
170
- h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate);
171
+ h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : pc),
172
+ flags, cflags, *cpu->trace_dstate);
173
return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
174
}
175
176
@@ -XXX,XX +XXX,XX @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
177
uint32_t flags, uint32_t cflags)
178
{
179
TranslationBlock *tb;
180
+ CPUJumpCache *jc;
181
uint32_t hash;
182
183
/* we should never be trying to look up an INVALID tb */
184
tcg_debug_assert(!(cflags & CF_INVALID));
185
186
hash = tb_jmp_cache_hash_func(pc);
187
- tb = qatomic_rcu_read(&cpu->tb_jmp_cache->array[hash].tb);
188
+ jc = cpu->tb_jmp_cache;
189
+ tb = tb_jmp_cache_get_tb(jc, hash);
190
191
if (likely(tb &&
192
- tb->pc == pc &&
193
+ tb_jmp_cache_get_pc(jc, hash, tb) == pc &&
194
tb->cs_base == cs_base &&
195
tb->flags == flags &&
196
tb->trace_vcpu_dstate == *cpu->trace_dstate &&
197
@@ -XXX,XX +XXX,XX @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
198
if (tb == NULL) {
199
return NULL;
200
}
201
- qatomic_set(&cpu->tb_jmp_cache->array[hash].tb, tb);
202
+ tb_jmp_cache_set(jc, hash, tb, pc);
203
return tb;
204
}
205
206
@@ -XXX,XX +XXX,XX @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
207
if (cc->tcg_ops->synchronize_from_tb) {
208
cc->tcg_ops->synchronize_from_tb(cpu, last_tb);
209
} else {
210
+ assert(!TARGET_TB_PCREL);
211
assert(cc->set_pc);
212
cc->set_pc(cpu, tb_pc(last_tb));
213
}
214
@@ -XXX,XX +XXX,XX @@ int cpu_exec(CPUState *cpu)
215
* for the fast lookup
216
*/
217
h = tb_jmp_cache_hash_func(pc);
218
- qatomic_set(&cpu->tb_jmp_cache->array[h].tb, tb);
219
+ tb_jmp_cache_set(cpu->tb_jmp_cache, h, tb, pc);
220
}
221
222
#ifndef CONFIG_USER_ONLY
223
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
224
index XXXXXXX..XXXXXXX 100644
225
--- a/accel/tcg/translate-all.c
226
+++ b/accel/tcg/translate-all.c
227
@@ -XXX,XX +XXX,XX @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
228
229
for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
230
if (i == 0) {
231
- prev = (j == 0 ? tb_pc(tb) : 0);
232
+ prev = (!TARGET_TB_PCREL && j == 0 ? tb_pc(tb) : 0);
233
} else {
234
prev = tcg_ctx->gen_insn_data[i - 1][j];
235
}
236
@@ -XXX,XX +XXX,XX @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
237
static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
238
uintptr_t searched_pc, bool reset_icount)
239
{
240
- target_ulong data[TARGET_INSN_START_WORDS] = { tb_pc(tb) };
241
+ target_ulong data[TARGET_INSN_START_WORDS];
242
uintptr_t host_pc = (uintptr_t)tb->tc.ptr;
243
CPUArchState *env = cpu->env_ptr;
244
const uint8_t *p = tb->tc.ptr + tb->tc.size;
245
@@ -XXX,XX +XXX,XX @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
246
return -1;
247
}
248
249
+ memset(data, 0, sizeof(data));
250
+ if (!TARGET_TB_PCREL) {
251
+ data[0] = tb_pc(tb);
58
+ }
252
+ }
59
+ switch (type) {
253
+
60
+ case TCG_TYPE_I32:
254
/* Reconstruct the stored insn data while looking for the point at
61
+ if (ret < TCG_REG_Q0 && arg < TCG_REG_Q0) {
255
which the end of the insn exceeds the searched_pc. */
62
+ tcg_out_mov_reg(s, COND_AL, ret, arg);
256
for (i = 0; i < num_insns; ++i) {
63
+ return true;
257
@@ -XXX,XX +XXX,XX @@ static bool tb_cmp(const void *ap, const void *bp)
258
const TranslationBlock *a = ap;
259
const TranslationBlock *b = bp;
260
261
- return tb_pc(a) == tb_pc(b) &&
262
- a->cs_base == b->cs_base &&
263
- a->flags == b->flags &&
264
- (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
265
- a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
266
- a->page_addr[0] == b->page_addr[0] &&
267
- a->page_addr[1] == b->page_addr[1];
268
+ return ((TARGET_TB_PCREL || tb_pc(a) == tb_pc(b)) &&
269
+ a->cs_base == b->cs_base &&
270
+ a->flags == b->flags &&
271
+ (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
272
+ a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
273
+ a->page_addr[0] == b->page_addr[0] &&
274
+ a->page_addr[1] == b->page_addr[1]);
275
}
276
277
void tb_htable_init(void)
278
@@ -XXX,XX +XXX,XX @@ static inline void tb_jmp_unlink(TranslationBlock *dest)
279
qemu_spin_unlock(&dest->jmp_lock);
280
}
281
282
+static void tb_jmp_cache_inval_tb(TranslationBlock *tb)
283
+{
284
+ CPUState *cpu;
285
+
286
+ if (TARGET_TB_PCREL) {
287
+ /* A TB may be at any virtual address */
288
+ CPU_FOREACH(cpu) {
289
+ tcg_flush_jmp_cache(cpu);
64
+ }
290
+ }
65
+ return false;
291
+ } else {
66
+
292
+ uint32_t h = tb_jmp_cache_hash_func(tb_pc(tb));
67
+ case TCG_TYPE_V64:
293
+
68
+ case TCG_TYPE_V128:
294
+ CPU_FOREACH(cpu) {
69
+ /* "VMOV D,N" is an alias for "VORR D,N,N". */
295
+ CPUJumpCache *jc = cpu->tb_jmp_cache;
70
+ tcg_out_vreg3(s, INSN_VORR, type - TCG_TYPE_V64, 0, ret, arg, arg);
296
+
71
+ return true;
297
+ if (qatomic_read(&jc->array[h].tb) == tb) {
72
+
298
+ qatomic_set(&jc->array[h].tb, NULL);
73
+ default:
299
+ }
74
+ g_assert_not_reached();
300
+ }
75
+ }
301
+ }
76
}
302
+}
77
303
+
78
-static inline void tcg_out_movi(TCGContext *s, TCGType type,
304
/*
79
- TCGReg ret, tcg_target_long arg)
305
* In user-mode, call with mmap_lock held.
80
+static void tcg_out_movi(TCGContext *s, TCGType type,
306
* In !user-mode, if @rm_from_page_list is set, call with the TB's pages'
81
+ TCGReg ret, tcg_target_long arg)
307
@@ -XXX,XX +XXX,XX @@ static inline void tb_jmp_unlink(TranslationBlock *dest)
82
{
308
*/
83
+ tcg_debug_assert(type == TCG_TYPE_I32);
309
static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
84
+ tcg_debug_assert(ret < TCG_REG_Q0);
310
{
85
tcg_out_movi32(s, COND_AL, ret, arg);
311
- CPUState *cpu;
86
}
312
PageDesc *p;
87
313
uint32_t h;
314
tb_page_addr_t phys_pc;
315
@@ -XXX,XX +XXX,XX @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
316
317
/* remove the TB from the hash list */
318
phys_pc = tb->page_addr[0];
319
- h = tb_hash_func(phys_pc, tb_pc(tb), tb->flags, orig_cflags,
320
- tb->trace_vcpu_dstate);
321
+ h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : tb_pc(tb)),
322
+ tb->flags, orig_cflags, tb->trace_vcpu_dstate);
323
if (!qht_remove(&tb_ctx.htable, tb, h)) {
324
return;
325
}
326
@@ -XXX,XX +XXX,XX @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
327
}
328
329
/* remove the TB from the hash list */
330
- h = tb_jmp_cache_hash_func(tb->pc);
331
- CPU_FOREACH(cpu) {
332
- CPUJumpCache *jc = cpu->tb_jmp_cache;
333
- if (qatomic_read(&jc->array[h].tb) == tb) {
334
- qatomic_set(&jc->array[h].tb, NULL);
335
- }
336
- }
337
+ tb_jmp_cache_inval_tb(tb);
338
339
/* suppress this TB from the two jump lists */
340
tb_remove_from_jmp_list(tb, 0);
341
@@ -XXX,XX +XXX,XX @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
342
}
343
344
/* add in the hash table */
345
- h = tb_hash_func(phys_pc, tb_pc(tb), tb->flags, tb->cflags,
346
- tb->trace_vcpu_dstate);
347
+ h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : tb_pc(tb)),
348
+ tb->flags, tb->cflags, tb->trace_vcpu_dstate);
349
qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
350
351
/* remove TB from the page(s) if we couldn't insert it */
352
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
353
354
gen_code_buf = tcg_ctx->code_gen_ptr;
355
tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
356
+#if !TARGET_TB_PCREL
357
tb->pc = pc;
358
+#endif
359
tb->cs_base = cs_base;
360
tb->flags = flags;
361
tb->cflags = cflags;
88
--
362
--
89
2.25.1
363
2.34.1
90
364
91
365
diff view generated by jsdifflib
1
Most of dupi is copied from tcg/aarch64, which has the same
1
From: Leandro Lupori <leandro.lupori@eldorado.org.br>
2
encoding for AdvSimdExpandImm.
3
2
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
PowerPC64 processors handle direct branches better than indirect
4
ones, resulting in less stalled cycles and branch misses.
5
6
However, PPC's tb_target_set_jmp_target() was only using direct
7
branches for 16-bit jumps, while PowerPC64's unconditional branch
8
instructions are able to handle displacements of up to 26 bits.
9
To take advantage of this, now jumps whose displacements fit in
10
between 17 and 26 bits are also converted to direct branches.
11
12
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
13
Signed-off-by: Leandro Lupori <leandro.lupori@eldorado.org.br>
14
[rth: Expanded some commentary.]
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
15
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
16
---
7
tcg/arm/tcg-target.c.inc | 283 +++++++++++++++++++++++++++++++++++++--
17
tcg/ppc/tcg-target.c.inc | 119 +++++++++++++++++++++++++++++----------
8
1 file changed, 275 insertions(+), 8 deletions(-)
18
1 file changed, 88 insertions(+), 31 deletions(-)
9
19
10
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
20
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
11
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/arm/tcg-target.c.inc
22
--- a/tcg/ppc/tcg-target.c.inc
13
+++ b/tcg/arm/tcg-target.c.inc
23
+++ b/tcg/ppc/tcg-target.c.inc
14
@@ -XXX,XX +XXX,XX @@ typedef enum {
24
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
15
25
tcg_out32(s, insn);
16
INSN_VORR = 0xf2200110,
17
18
+ INSN_VDUP_G = 0xee800b10, /* VDUP (ARM core register) */
19
+ INSN_VDUP_S = 0xf3b00c00, /* VDUP (scalar) */
20
+ INSN_VLDR_D = 0xed100b00, /* VLDR.64 */
21
INSN_VLD1 = 0xf4200000, /* VLD1 (multiple single elements) */
22
+ INSN_VLD1R = 0xf4a00c00, /* VLD1 (single element to all lanes) */
23
INSN_VST1 = 0xf4000000, /* VST1 (multiple single elements) */
24
+ INSN_VMOVI = 0xf2800010, /* VMOV (immediate) */
25
} ARMInsn;
26
27
#define INSN_NOP (use_armv7_instructions ? INSN_NOP_v6k : INSN_NOP_v4)
28
@@ -XXX,XX +XXX,XX @@ static const uint8_t tcg_cond_to_arm_cond[] = {
29
[TCG_COND_GTU] = COND_HI,
30
};
31
32
+static int encode_imm(uint32_t imm);
33
+
34
+/* TCG private relocation type: add with pc+imm8 */
35
+#define R_ARM_PC8 11
36
+
37
+/* TCG private relocation type: vldr with imm8 << 2 */
38
+#define R_ARM_PC11 12
39
+
40
static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
41
{
42
const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
43
@@ -XXX,XX +XXX,XX @@ static bool reloc_pc13(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
44
return false;
45
}
26
}
46
27
47
+static bool reloc_pc11(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
28
+static inline uint64_t make_pair(tcg_insn_unit i1, tcg_insn_unit i2)
48
+{
29
+{
49
+ const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
30
+ if (HOST_BIG_ENDIAN) {
50
+ ptrdiff_t offset = (tcg_ptr_byte_diff(target, src_rx) - 8) / 4;
31
+ return (uint64_t)i1 << 32 | i2;
51
+
52
+ if (offset >= -0xff && offset <= 0xff) {
53
+ tcg_insn_unit insn = *src_rw;
54
+ bool u = (offset >= 0);
55
+ if (!u) {
56
+ offset = -offset;
57
+ }
58
+ insn = deposit32(insn, 23, 1, u);
59
+ insn = deposit32(insn, 0, 8, offset);
60
+ *src_rw = insn;
61
+ return true;
62
+ }
32
+ }
63
+ return false;
33
+ return (uint64_t)i2 << 32 | i1;
64
+}
34
+}
65
+
35
+
66
+static bool reloc_pc8(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
36
+static inline void ppc64_replace2(uintptr_t rx, uintptr_t rw,
37
+ tcg_insn_unit i0, tcg_insn_unit i1)
67
+{
38
+{
68
+ const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
39
+#if TCG_TARGET_REG_BITS == 64
69
+ ptrdiff_t offset = tcg_ptr_byte_diff(target, src_rx) - 8;
40
+ qatomic_set((uint64_t *)rw, make_pair(i0, i1));
70
+ int rot = encode_imm(offset);
41
+ flush_idcache_range(rx, rw, 8);
71
+
42
+#else
72
+ if (rot >= 0) {
43
+ qemu_build_not_reached();
73
+ *src_rw = deposit32(*src_rw, 0, 12, rol32(offset, rot) | (rot << 7));
44
+#endif
74
+ return true;
75
+ }
76
+ return false;
77
+}
45
+}
78
+
46
+
79
static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
47
+static inline void ppc64_replace4(uintptr_t rx, uintptr_t rw,
80
intptr_t value, intptr_t addend)
48
+ tcg_insn_unit i0, tcg_insn_unit i1,
81
{
49
+ tcg_insn_unit i2, tcg_insn_unit i3)
82
tcg_debug_assert(addend == 0);
83
-
84
- if (type == R_ARM_PC24) {
85
+ switch (type) {
86
+ case R_ARM_PC24:
87
return reloc_pc24(code_ptr, (const tcg_insn_unit *)value);
88
- } else if (type == R_ARM_PC13) {
89
+ case R_ARM_PC13:
90
return reloc_pc13(code_ptr, (const tcg_insn_unit *)value);
91
- } else {
92
+ case R_ARM_PC11:
93
+ return reloc_pc11(code_ptr, (const tcg_insn_unit *)value);
94
+ case R_ARM_PC8:
95
+ return reloc_pc8(code_ptr, (const tcg_insn_unit *)value);
96
+ default:
97
g_assert_not_reached();
98
}
99
}
100
@@ -XXX,XX +XXX,XX @@ static inline uint32_t rotl(uint32_t val, int n)
101
102
/* ARM immediates for ALU instructions are made of an unsigned 8-bit
103
right-rotated by an even amount between 0 and 30. */
104
-static inline int encode_imm(uint32_t imm)
105
+static int encode_imm(uint32_t imm)
106
{
107
int shift;
108
109
@@ -XXX,XX +XXX,XX @@ static inline int check_fit_imm(uint32_t imm)
110
return encode_imm(imm) >= 0;
111
}
112
113
+/* Return true if v16 is a valid 16-bit shifted immediate. */
114
+static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
115
+{
50
+{
116
+ if (v16 == (v16 & 0xff)) {
51
+ uint64_t p[2];
117
+ *cmode = 0x8;
52
+
118
+ *imm8 = v16 & 0xff;
53
+ p[!HOST_BIG_ENDIAN] = make_pair(i0, i1);
119
+ return true;
54
+ p[HOST_BIG_ENDIAN] = make_pair(i2, i3);
120
+ } else if (v16 == (v16 & 0xff00)) {
55
+
121
+ *cmode = 0xa;
56
+ /*
122
+ *imm8 = v16 >> 8;
57
+ * There's no convenient way to get the compiler to allocate a pair
123
+ return true;
58
+ * of registers at an even index, so copy into r6/r7 and clobber.
124
+ }
59
+ */
125
+ return false;
60
+ asm("mr %%r6, %1\n\t"
61
+ "mr %%r7, %2\n\t"
62
+ "stq %%r6, %0"
63
+ : "=Q"(*(__int128 *)rw) : "r"(p[0]), "r"(p[1]) : "r6", "r7");
64
+ flush_idcache_range(rx, rw, 16);
126
+}
65
+}
127
+
66
+
128
+/* Return true if v32 is a valid 32-bit shifted immediate. */
67
void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
129
+static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
68
uintptr_t jmp_rw, uintptr_t addr)
130
+{
69
{
131
+ if (v32 == (v32 & 0xff)) {
70
- if (TCG_TARGET_REG_BITS == 64) {
132
+ *cmode = 0x0;
71
- tcg_insn_unit i1, i2;
133
+ *imm8 = v32 & 0xff;
72
- intptr_t tb_diff = addr - tc_ptr;
134
+ return true;
73
- intptr_t br_diff = addr - (jmp_rx + 4);
135
+ } else if (v32 == (v32 & 0xff00)) {
74
- uint64_t pair;
136
+ *cmode = 0x2;
75
+ tcg_insn_unit i0, i1, i2, i3;
137
+ *imm8 = (v32 >> 8) & 0xff;
76
+ intptr_t tb_diff = addr - tc_ptr;
138
+ return true;
77
+ intptr_t br_diff = addr - (jmp_rx + 4);
139
+ } else if (v32 == (v32 & 0xff0000)) {
78
+ intptr_t lo, hi;
140
+ *cmode = 0x4;
79
141
+ *imm8 = (v32 >> 16) & 0xff;
80
- /* This does not exercise the range of the branch, but we do
142
+ return true;
81
- still need to be able to load the new value of TCG_REG_TB.
143
+ } else if (v32 == (v32 & 0xff000000)) {
82
- But this does still happen quite often. */
144
+ *cmode = 0x6;
83
- if (tb_diff == (int16_t)tb_diff) {
145
+ *imm8 = v32 >> 24;
84
- i1 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, tb_diff);
146
+ return true;
85
- i2 = B | (br_diff & 0x3fffffc);
86
- } else {
87
- intptr_t lo = (int16_t)tb_diff;
88
- intptr_t hi = (int32_t)(tb_diff - lo);
89
- assert(tb_diff == hi + lo);
90
- i1 = ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, hi >> 16);
91
- i2 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, lo);
92
- }
93
-#if HOST_BIG_ENDIAN
94
- pair = (uint64_t)i1 << 32 | i2;
95
-#else
96
- pair = (uint64_t)i2 << 32 | i1;
97
-#endif
98
-
99
- /* As per the enclosing if, this is ppc64. Avoid the _Static_assert
100
- within qatomic_set that would fail to build a ppc32 host. */
101
- qatomic_set__nocheck((uint64_t *)jmp_rw, pair);
102
- flush_idcache_range(jmp_rx, jmp_rw, 8);
103
- } else {
104
+ if (TCG_TARGET_REG_BITS == 32) {
105
intptr_t diff = addr - jmp_rx;
106
tcg_debug_assert(in_range_b(diff));
107
qatomic_set((uint32_t *)jmp_rw, B | (diff & 0x3fffffc));
108
flush_idcache_range(jmp_rx, jmp_rw, 4);
109
+ return;
110
}
111
+
112
+ /*
113
+ * For 16-bit displacements, we can use a single add + branch.
114
+ * This happens quite often.
115
+ */
116
+ if (tb_diff == (int16_t)tb_diff) {
117
+ i0 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, tb_diff);
118
+ i1 = B | (br_diff & 0x3fffffc);
119
+ ppc64_replace2(jmp_rx, jmp_rw, i0, i1);
120
+ return;
147
+ }
121
+ }
148
+ return false;
149
+}
150
+
122
+
151
+/* Return true if v32 is a valid 32-bit shifting ones immediate. */
123
+ lo = (int16_t)tb_diff;
152
+static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
124
+ hi = (int32_t)(tb_diff - lo);
153
+{
125
+ assert(tb_diff == hi + lo);
154
+ if ((v32 & 0xffff00ff) == 0xff) {
126
+ i0 = ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, hi >> 16);
155
+ *cmode = 0xc;
127
+ i1 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, lo);
156
+ *imm8 = (v32 >> 8) & 0xff;
157
+ return true;
158
+ } else if ((v32 & 0xff00ffff) == 0xffff) {
159
+ *cmode = 0xd;
160
+ *imm8 = (v32 >> 16) & 0xff;
161
+ return true;
162
+ }
163
+ return false;
164
+}
165
+
128
+
166
+/*
129
+ /*
167
+ * Return non-zero if v32 can be formed by MOVI+ORR.
130
+ * Without stq from 2.07, we can only update two insns,
168
+ * Place the parameters for MOVI in (cmode, imm8).
131
+ * and those must be the ones that load the target address.
169
+ * Return the cmode for ORR; the imm8 can be had via extraction from v32.
132
+ */
170
+ */
133
+ if (!have_isa_2_07) {
171
+static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
134
+ ppc64_replace2(jmp_rx, jmp_rw, i0, i1);
172
+{
173
+ int i;
174
+
175
+ for (i = 6; i > 0; i -= 2) {
176
+ /* Mask out one byte we can add with ORR. */
177
+ uint32_t tmp = v32 & ~(0xffu << (i * 4));
178
+ if (is_shimm32(tmp, cmode, imm8) ||
179
+ is_soimm32(tmp, cmode, imm8)) {
180
+ break;
181
+ }
182
+ }
183
+ return i;
184
+}
185
+
186
/* Test if a constant matches the constraint.
187
* TODO: define constraints for:
188
*
189
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vreg3(TCGContext *s, ARMInsn insn, int q, int vece,
190
encode_vd(d) | encode_vn(n) | encode_vm(m));
191
}
192
193
+static void tcg_out_vmovi(TCGContext *s, TCGReg rd,
194
+ int q, int op, int cmode, uint8_t imm8)
195
+{
196
+ tcg_out32(s, INSN_VMOVI | encode_vd(rd) | (q << 6) | (op << 5)
197
+ | (cmode << 8) | extract32(imm8, 0, 4)
198
+ | (extract32(imm8, 4, 3) << 16)
199
+ | (extract32(imm8, 7, 1) << 24));
200
+}
201
+
202
static void tcg_out_vldst(TCGContext *s, ARMInsn insn,
203
TCGReg rd, TCGReg rn, int offset)
204
{
205
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi(TCGContext *s, TCGType type,
206
tcg_out_movi32(s, COND_AL, ret, arg);
207
}
208
209
+/* Type is always V128, with I64 elements. */
210
+static void tcg_out_dup2_vec(TCGContext *s, TCGReg rd, TCGReg rl, TCGReg rh)
211
+{
212
+ /* Move high element into place first. */
213
+ /* VMOV Dd+1, Ds */
214
+ tcg_out_vreg3(s, INSN_VORR | (1 << 12), 0, 0, rd, rh, rh);
215
+ /* Move low element into place; tcg_out_mov will check for nop. */
216
+ tcg_out_mov(s, TCG_TYPE_V64, rd, rl);
217
+}
218
+
219
static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
220
TCGReg rd, TCGReg rs)
221
{
222
- g_assert_not_reached();
223
+ int q = type - TCG_TYPE_V64;
224
+
225
+ if (vece == MO_64) {
226
+ if (type == TCG_TYPE_V128) {
227
+ tcg_out_dup2_vec(s, rd, rs, rs);
228
+ } else {
229
+ tcg_out_mov(s, TCG_TYPE_V64, rd, rs);
230
+ }
231
+ } else if (rs < TCG_REG_Q0) {
232
+ int b = (vece == MO_8);
233
+ int e = (vece == MO_16);
234
+ tcg_out32(s, INSN_VDUP_G | (b << 22) | (q << 21) | (e << 5) |
235
+ encode_vn(rd) | (rs << 12));
236
+ } else {
237
+ int imm4 = 1 << vece;
238
+ tcg_out32(s, INSN_VDUP_S | (imm4 << 16) | (q << 6) |
239
+ encode_vd(rd) | encode_vm(rs));
240
+ }
241
+ return true;
242
}
243
244
static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
245
TCGReg rd, TCGReg base, intptr_t offset)
246
{
247
- g_assert_not_reached();
248
+ if (vece == MO_64) {
249
+ tcg_out_ld(s, TCG_TYPE_V64, rd, base, offset);
250
+ if (type == TCG_TYPE_V128) {
251
+ tcg_out_dup2_vec(s, rd, rd, rd);
252
+ }
253
+ } else {
254
+ int q = type - TCG_TYPE_V64;
255
+ tcg_out_vldst(s, INSN_VLD1R | (vece << 6) | (q << 5),
256
+ rd, base, offset);
257
+ }
258
+ return true;
259
}
260
261
static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
262
TCGReg rd, int64_t v64)
263
{
264
- g_assert_not_reached();
265
+ int q = type - TCG_TYPE_V64;
266
+ int cmode, imm8, i;
267
+
268
+ /* Test all bytes equal first. */
269
+ if (vece == MO_8) {
270
+ tcg_out_vmovi(s, rd, q, 0, 0xe, v64);
271
+ return;
135
+ return;
272
+ }
136
+ }
273
+
137
+
274
+ /*
138
+ /*
275
+ * Test all bytes 0x00 or 0xff second. This can match cases that
139
+ * For 26-bit displacements, we can use a direct branch.
276
+ * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
140
+ * Otherwise we still need the indirect branch, which we
141
+ * must restore after a potential direct branch write.
277
+ */
142
+ */
278
+ for (i = imm8 = 0; i < 8; i++) {
143
+ br_diff -= 4;
279
+ uint8_t byte = v64 >> (i * 8);
144
+ if (in_range_b(br_diff)) {
280
+ if (byte == 0xff) {
145
+ i2 = B | (br_diff & 0x3fffffc);
281
+ imm8 |= 1 << i;
146
+ i3 = NOP;
282
+ } else if (byte != 0) {
147
+ } else {
283
+ goto fail_bytes;
148
+ i2 = MTSPR | RS(TCG_REG_TB) | CTR;
284
+ }
149
+ i3 = BCCTR | BO_ALWAYS;
285
+ }
150
+ }
286
+ tcg_out_vmovi(s, rd, q, 1, 0xe, imm8);
151
+ ppc64_replace4(jmp_rx, jmp_rw, i0, i1, i2, i3);
287
+ return;
288
+ fail_bytes:
289
+
290
+ /*
291
+ * Tests for various replications. For each element width, if we
292
+ * cannot find an expansion there's no point checking a larger
293
+ * width because we already know by replication it cannot match.
294
+ */
295
+ if (vece == MO_16) {
296
+ uint16_t v16 = v64;
297
+
298
+ if (is_shimm16(v16, &cmode, &imm8)) {
299
+ tcg_out_vmovi(s, rd, q, 0, cmode, imm8);
300
+ return;
301
+ }
302
+ if (is_shimm16(~v16, &cmode, &imm8)) {
303
+ tcg_out_vmovi(s, rd, q, 1, cmode, imm8);
304
+ return;
305
+ }
306
+
307
+ /*
308
+ * Otherwise, all remaining constants can be loaded in two insns:
309
+ * rd = v16 & 0xff, rd |= v16 & 0xff00.
310
+ */
311
+ tcg_out_vmovi(s, rd, q, 0, 0x8, v16 & 0xff);
312
+ tcg_out_vmovi(s, rd, q, 0, 0xb, v16 >> 8); /* VORRI */
313
+ return;
314
+ }
315
+
316
+ if (vece == MO_32) {
317
+ uint32_t v32 = v64;
318
+
319
+ if (is_shimm32(v32, &cmode, &imm8) ||
320
+ is_soimm32(v32, &cmode, &imm8)) {
321
+ tcg_out_vmovi(s, rd, q, 0, cmode, imm8);
322
+ return;
323
+ }
324
+ if (is_shimm32(~v32, &cmode, &imm8) ||
325
+ is_soimm32(~v32, &cmode, &imm8)) {
326
+ tcg_out_vmovi(s, rd, q, 1, cmode, imm8);
327
+ return;
328
+ }
329
+
330
+ /*
331
+ * Restrict the set of constants to those we can load with
332
+ * two instructions. Others we load from the pool.
333
+ */
334
+ i = is_shimm32_pair(v32, &cmode, &imm8);
335
+ if (i) {
336
+ tcg_out_vmovi(s, rd, q, 0, cmode, imm8);
337
+ tcg_out_vmovi(s, rd, q, 0, i | 1, extract32(v32, i * 4, 8));
338
+ return;
339
+ }
340
+ i = is_shimm32_pair(~v32, &cmode, &imm8);
341
+ if (i) {
342
+ tcg_out_vmovi(s, rd, q, 1, cmode, imm8);
343
+ tcg_out_vmovi(s, rd, q, 1, i | 1, extract32(~v32, i * 4, 8));
344
+ return;
345
+ }
346
+ }
347
+
348
+ /*
349
+ * As a last resort, load from the constant pool.
350
+ */
351
+ if (!q || vece == MO_64) {
352
+ new_pool_l2(s, R_ARM_PC11, s->code_ptr, 0, v64, v64 >> 32);
353
+ /* VLDR Dd, [pc + offset] */
354
+ tcg_out32(s, INSN_VLDR_D | encode_vd(rd) | (0xf << 16));
355
+ if (q) {
356
+ tcg_out_dup2_vec(s, rd, rd, rd);
357
+ }
358
+ } else {
359
+ new_pool_label(s, (uint32_t)v64, R_ARM_PC8, s->code_ptr, 0);
360
+ /* add tmp, pc, offset */
361
+ tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_TMP, TCG_REG_PC, 0);
362
+ tcg_out_dupm_vec(s, type, MO_32, rd, TCG_REG_TMP, 0);
363
+ }
364
}
152
}
365
153
366
static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
154
static void tcg_out_call_int(TCGContext *s, int lk,
155
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
156
if (s->tb_jmp_insn_offset) {
157
/* Direct jump. */
158
if (TCG_TARGET_REG_BITS == 64) {
159
- /* Ensure the next insns are 8-byte aligned. */
160
- if ((uintptr_t)s->code_ptr & 7) {
161
+ /* Ensure the next insns are 8 or 16-byte aligned. */
162
+ while ((uintptr_t)s->code_ptr & (have_isa_2_07 ? 15 : 7)) {
163
tcg_out32(s, NOP);
164
}
165
s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
367
--
166
--
368
2.25.1
167
2.34.1
369
370
diff view generated by jsdifflib
New patch
1
The value previously chosen overlaps GUSA_MASK.
1
2
3
Rename all DELAY_SLOT_* and GUSA_* defines to emphasize
4
that they are included in TB_FLAGs. Add aliases for the
5
FPSCR and SR bits that are included in TB_FLAGS, so that
6
we don't accidentally reassign those bits.
7
8
Fixes: 4da06fb3062 ("target/sh4: Implement prctl_unalign_sigbus")
9
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/856
10
Reviewed-by: Yoshinori Sato <ysato@users.sourceforge.jp>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
13
target/sh4/cpu.h | 56 +++++++++++++------------
14
linux-user/sh4/signal.c | 6 +--
15
target/sh4/cpu.c | 6 +--
16
target/sh4/helper.c | 6 +--
17
target/sh4/translate.c | 90 ++++++++++++++++++++++-------------------
18
5 files changed, 88 insertions(+), 76 deletions(-)
19
20
diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
21
index XXXXXXX..XXXXXXX 100644
22
--- a/target/sh4/cpu.h
23
+++ b/target/sh4/cpu.h
24
@@ -XXX,XX +XXX,XX @@
25
#define FPSCR_RM_NEAREST (0 << 0)
26
#define FPSCR_RM_ZERO (1 << 0)
27
28
-#define DELAY_SLOT_MASK 0x7
29
-#define DELAY_SLOT (1 << 0)
30
-#define DELAY_SLOT_CONDITIONAL (1 << 1)
31
-#define DELAY_SLOT_RTE (1 << 2)
32
+#define TB_FLAG_DELAY_SLOT (1 << 0)
33
+#define TB_FLAG_DELAY_SLOT_COND (1 << 1)
34
+#define TB_FLAG_DELAY_SLOT_RTE (1 << 2)
35
+#define TB_FLAG_PENDING_MOVCA (1 << 3)
36
+#define TB_FLAG_GUSA_SHIFT 4 /* [11:4] */
37
+#define TB_FLAG_GUSA_EXCLUSIVE (1 << 12)
38
+#define TB_FLAG_UNALIGN (1 << 13)
39
+#define TB_FLAG_SR_FD (1 << SR_FD) /* 15 */
40
+#define TB_FLAG_FPSCR_PR FPSCR_PR /* 19 */
41
+#define TB_FLAG_FPSCR_SZ FPSCR_SZ /* 20 */
42
+#define TB_FLAG_FPSCR_FR FPSCR_FR /* 21 */
43
+#define TB_FLAG_SR_RB (1 << SR_RB) /* 29 */
44
+#define TB_FLAG_SR_MD (1 << SR_MD) /* 30 */
45
46
-#define TB_FLAG_PENDING_MOVCA (1 << 3)
47
-#define TB_FLAG_UNALIGN (1 << 4)
48
-
49
-#define GUSA_SHIFT 4
50
-#ifdef CONFIG_USER_ONLY
51
-#define GUSA_EXCLUSIVE (1 << 12)
52
-#define GUSA_MASK ((0xff << GUSA_SHIFT) | GUSA_EXCLUSIVE)
53
-#else
54
-/* Provide dummy versions of the above to allow tests against tbflags
55
- to be elided while avoiding ifdefs. */
56
-#define GUSA_EXCLUSIVE 0
57
-#define GUSA_MASK 0
58
-#endif
59
-
60
-#define TB_FLAG_ENVFLAGS_MASK (DELAY_SLOT_MASK | GUSA_MASK)
61
+#define TB_FLAG_DELAY_SLOT_MASK (TB_FLAG_DELAY_SLOT | \
62
+ TB_FLAG_DELAY_SLOT_COND | \
63
+ TB_FLAG_DELAY_SLOT_RTE)
64
+#define TB_FLAG_GUSA_MASK ((0xff << TB_FLAG_GUSA_SHIFT) | \
65
+ TB_FLAG_GUSA_EXCLUSIVE)
66
+#define TB_FLAG_FPSCR_MASK (TB_FLAG_FPSCR_PR | \
67
+ TB_FLAG_FPSCR_SZ | \
68
+ TB_FLAG_FPSCR_FR)
69
+#define TB_FLAG_SR_MASK (TB_FLAG_SR_FD | \
70
+ TB_FLAG_SR_RB | \
71
+ TB_FLAG_SR_MD)
72
+#define TB_FLAG_ENVFLAGS_MASK (TB_FLAG_DELAY_SLOT_MASK | \
73
+ TB_FLAG_GUSA_MASK)
74
75
typedef struct tlb_t {
76
uint32_t vpn;        /* virtual page number */
77
@@ -XXX,XX +XXX,XX @@ static inline int cpu_mmu_index (CPUSH4State *env, bool ifetch)
78
{
79
/* The instruction in a RTE delay slot is fetched in privileged
80
mode, but executed in user mode. */
81
- if (ifetch && (env->flags & DELAY_SLOT_RTE)) {
82
+ if (ifetch && (env->flags & TB_FLAG_DELAY_SLOT_RTE)) {
83
return 0;
84
} else {
85
return (env->sr & (1u << SR_MD)) == 0 ? 1 : 0;
86
@@ -XXX,XX +XXX,XX @@ static inline void cpu_get_tb_cpu_state(CPUSH4State *env, target_ulong *pc,
87
{
88
*pc = env->pc;
89
/* For a gUSA region, notice the end of the region. */
90
- *cs_base = env->flags & GUSA_MASK ? env->gregs[0] : 0;
91
- *flags = env->flags /* TB_FLAG_ENVFLAGS_MASK: bits 0-2, 4-12 */
92
- | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR)) /* Bits 19-21 */
93
- | (env->sr & ((1u << SR_MD) | (1u << SR_RB))) /* Bits 29-30 */
94
- | (env->sr & (1u << SR_FD)) /* Bit 15 */
95
+ *cs_base = env->flags & TB_FLAG_GUSA_MASK ? env->gregs[0] : 0;
96
+ *flags = env->flags
97
+ | (env->fpscr & TB_FLAG_FPSCR_MASK)
98
+ | (env->sr & TB_FLAG_SR_MASK)
99
| (env->movcal_backup ? TB_FLAG_PENDING_MOVCA : 0); /* Bit 3 */
100
#ifdef CONFIG_USER_ONLY
101
*flags |= TB_FLAG_UNALIGN * !env_cpu(env)->prctl_unalign_sigbus;
102
diff --git a/linux-user/sh4/signal.c b/linux-user/sh4/signal.c
103
index XXXXXXX..XXXXXXX 100644
104
--- a/linux-user/sh4/signal.c
105
+++ b/linux-user/sh4/signal.c
106
@@ -XXX,XX +XXX,XX @@ static void restore_sigcontext(CPUSH4State *regs, struct target_sigcontext *sc)
107
__get_user(regs->fpul, &sc->sc_fpul);
108
109
regs->tra = -1; /* disable syscall checks */
110
- regs->flags &= ~(DELAY_SLOT_MASK | GUSA_MASK);
111
+ regs->flags = 0;
112
}
113
114
void setup_frame(int sig, struct target_sigaction *ka,
115
@@ -XXX,XX +XXX,XX @@ void setup_frame(int sig, struct target_sigaction *ka,
116
regs->gregs[5] = 0;
117
regs->gregs[6] = frame_addr += offsetof(typeof(*frame), sc);
118
regs->pc = (unsigned long) ka->_sa_handler;
119
- regs->flags &= ~(DELAY_SLOT_MASK | GUSA_MASK);
120
+ regs->flags &= ~(TB_FLAG_DELAY_SLOT_MASK | TB_FLAG_GUSA_MASK);
121
122
unlock_user_struct(frame, frame_addr, 1);
123
return;
124
@@ -XXX,XX +XXX,XX @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
125
regs->gregs[5] = frame_addr + offsetof(typeof(*frame), info);
126
regs->gregs[6] = frame_addr + offsetof(typeof(*frame), uc);
127
regs->pc = (unsigned long) ka->_sa_handler;
128
- regs->flags &= ~(DELAY_SLOT_MASK | GUSA_MASK);
129
+ regs->flags &= ~(TB_FLAG_DELAY_SLOT_MASK | TB_FLAG_GUSA_MASK);
130
131
unlock_user_struct(frame, frame_addr, 1);
132
return;
133
diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
134
index XXXXXXX..XXXXXXX 100644
135
--- a/target/sh4/cpu.c
136
+++ b/target/sh4/cpu.c
137
@@ -XXX,XX +XXX,XX @@ static void superh_cpu_synchronize_from_tb(CPUState *cs,
138
SuperHCPU *cpu = SUPERH_CPU(cs);
139
140
cpu->env.pc = tb_pc(tb);
141
- cpu->env.flags = tb->flags & TB_FLAG_ENVFLAGS_MASK;
142
+ cpu->env.flags = tb->flags;
143
}
144
145
#ifndef CONFIG_USER_ONLY
146
@@ -XXX,XX +XXX,XX @@ static bool superh_io_recompile_replay_branch(CPUState *cs,
147
SuperHCPU *cpu = SUPERH_CPU(cs);
148
CPUSH4State *env = &cpu->env;
149
150
- if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
151
+ if ((env->flags & (TB_FLAG_DELAY_SLOT | TB_FLAG_DELAY_SLOT_COND))
152
&& env->pc != tb_pc(tb)) {
153
env->pc -= 2;
154
- env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
155
+ env->flags &= ~(TB_FLAG_DELAY_SLOT | TB_FLAG_DELAY_SLOT_COND);
156
return true;
157
}
158
return false;
159
diff --git a/target/sh4/helper.c b/target/sh4/helper.c
160
index XXXXXXX..XXXXXXX 100644
161
--- a/target/sh4/helper.c
162
+++ b/target/sh4/helper.c
163
@@ -XXX,XX +XXX,XX @@ void superh_cpu_do_interrupt(CPUState *cs)
164
env->sr |= (1u << SR_BL) | (1u << SR_MD) | (1u << SR_RB);
165
env->lock_addr = -1;
166
167
- if (env->flags & DELAY_SLOT_MASK) {
168
+ if (env->flags & TB_FLAG_DELAY_SLOT_MASK) {
169
/* Branch instruction should be executed again before delay slot. */
170
    env->spc -= 2;
171
    /* Clear flags for exception/interrupt routine. */
172
- env->flags &= ~DELAY_SLOT_MASK;
173
+ env->flags &= ~TB_FLAG_DELAY_SLOT_MASK;
174
}
175
176
if (do_exp) {
177
@@ -XXX,XX +XXX,XX @@ bool superh_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
178
CPUSH4State *env = &cpu->env;
179
180
/* Delay slots are indivisible, ignore interrupts */
181
- if (env->flags & DELAY_SLOT_MASK) {
182
+ if (env->flags & TB_FLAG_DELAY_SLOT_MASK) {
183
return false;
184
} else {
185
superh_cpu_do_interrupt(cs);
186
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
187
index XXXXXXX..XXXXXXX 100644
188
--- a/target/sh4/translate.c
189
+++ b/target/sh4/translate.c
190
@@ -XXX,XX +XXX,XX @@ void superh_cpu_dump_state(CPUState *cs, FILE *f, int flags)
191
         i, env->gregs[i], i + 1, env->gregs[i + 1],
192
         i + 2, env->gregs[i + 2], i + 3, env->gregs[i + 3]);
193
}
194
- if (env->flags & DELAY_SLOT) {
195
+ if (env->flags & TB_FLAG_DELAY_SLOT) {
196
qemu_printf("in delay slot (delayed_pc=0x%08x)\n",
197
         env->delayed_pc);
198
- } else if (env->flags & DELAY_SLOT_CONDITIONAL) {
199
+ } else if (env->flags & TB_FLAG_DELAY_SLOT_COND) {
200
qemu_printf("in conditional delay slot (delayed_pc=0x%08x)\n",
201
         env->delayed_pc);
202
- } else if (env->flags & DELAY_SLOT_RTE) {
203
+ } else if (env->flags & TB_FLAG_DELAY_SLOT_RTE) {
204
qemu_fprintf(f, "in rte delay slot (delayed_pc=0x%08x)\n",
205
env->delayed_pc);
206
}
207
@@ -XXX,XX +XXX,XX @@ static inline void gen_save_cpu_state(DisasContext *ctx, bool save_pc)
208
209
static inline bool use_exit_tb(DisasContext *ctx)
210
{
211
- return (ctx->tbflags & GUSA_EXCLUSIVE) != 0;
212
+ return (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) != 0;
213
}
214
215
static bool use_goto_tb(DisasContext *ctx, target_ulong dest)
216
@@ -XXX,XX +XXX,XX @@ static void gen_conditional_jump(DisasContext *ctx, target_ulong dest,
217
TCGLabel *l1 = gen_new_label();
218
TCGCond cond_not_taken = jump_if_true ? TCG_COND_EQ : TCG_COND_NE;
219
220
- if (ctx->tbflags & GUSA_EXCLUSIVE) {
221
+ if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
222
/* When in an exclusive region, we must continue to the end.
223
Therefore, exit the region on a taken branch, but otherwise
224
fall through to the next instruction. */
225
tcg_gen_brcondi_i32(cond_not_taken, cpu_sr_t, 0, l1);
226
- tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~GUSA_MASK);
227
+ tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~TB_FLAG_GUSA_MASK);
228
/* Note that this won't actually use a goto_tb opcode because we
229
disallow it in use_goto_tb, but it handles exit + singlestep. */
230
gen_goto_tb(ctx, 0, dest);
231
@@ -XXX,XX +XXX,XX @@ static void gen_delayed_conditional_jump(DisasContext * ctx)
232
tcg_gen_mov_i32(ds, cpu_delayed_cond);
233
tcg_gen_discard_i32(cpu_delayed_cond);
234
235
- if (ctx->tbflags & GUSA_EXCLUSIVE) {
236
+ if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
237
/* When in an exclusive region, we must continue to the end.
238
Therefore, exit the region on a taken branch, but otherwise
239
fall through to the next instruction. */
240
tcg_gen_brcondi_i32(TCG_COND_EQ, ds, 0, l1);
241
242
/* Leave the gUSA region. */
243
- tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~GUSA_MASK);
244
+ tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~TB_FLAG_GUSA_MASK);
245
gen_jump(ctx);
246
247
gen_set_label(l1);
248
@@ -XXX,XX +XXX,XX @@ static inline void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
249
#define XHACK(x) ((((x) & 1 ) << 4) | ((x) & 0xe))
250
251
#define CHECK_NOT_DELAY_SLOT \
252
- if (ctx->envflags & DELAY_SLOT_MASK) { \
253
- goto do_illegal_slot; \
254
+ if (ctx->envflags & TB_FLAG_DELAY_SLOT_MASK) { \
255
+ goto do_illegal_slot; \
256
}
257
258
#define CHECK_PRIVILEGED \
259
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
260
case 0x000b:        /* rts */
261
    CHECK_NOT_DELAY_SLOT
262
    tcg_gen_mov_i32(cpu_delayed_pc, cpu_pr);
263
- ctx->envflags |= DELAY_SLOT;
264
+ ctx->envflags |= TB_FLAG_DELAY_SLOT;
265
    ctx->delayed_pc = (uint32_t) - 1;
266
    return;
267
case 0x0028:        /* clrmac */
268
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
269
    CHECK_NOT_DELAY_SLOT
270
gen_write_sr(cpu_ssr);
271
    tcg_gen_mov_i32(cpu_delayed_pc, cpu_spc);
272
- ctx->envflags |= DELAY_SLOT_RTE;
273
+ ctx->envflags |= TB_FLAG_DELAY_SLOT_RTE;
274
    ctx->delayed_pc = (uint32_t) - 1;
275
ctx->base.is_jmp = DISAS_STOP;
276
    return;
277
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
278
    return;
279
case 0xe000:        /* mov #imm,Rn */
280
#ifdef CONFIG_USER_ONLY
281
- /* Detect the start of a gUSA region. If so, update envflags
282
- and end the TB. This will allow us to see the end of the
283
- region (stored in R0) in the next TB. */
284
+ /*
285
+ * Detect the start of a gUSA region (mov #-n, r15).
286
+ * If so, update envflags and end the TB. This will allow us
287
+ * to see the end of the region (stored in R0) in the next TB.
288
+ */
289
if (B11_8 == 15 && B7_0s < 0 &&
290
(tb_cflags(ctx->base.tb) & CF_PARALLEL)) {
291
- ctx->envflags = deposit32(ctx->envflags, GUSA_SHIFT, 8, B7_0s);
292
+ ctx->envflags =
293
+ deposit32(ctx->envflags, TB_FLAG_GUSA_SHIFT, 8, B7_0s);
294
ctx->base.is_jmp = DISAS_STOP;
295
}
296
#endif
297
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
298
case 0xa000:        /* bra disp */
299
    CHECK_NOT_DELAY_SLOT
300
ctx->delayed_pc = ctx->base.pc_next + 4 + B11_0s * 2;
301
- ctx->envflags |= DELAY_SLOT;
302
+ ctx->envflags |= TB_FLAG_DELAY_SLOT;
303
    return;
304
case 0xb000:        /* bsr disp */
305
    CHECK_NOT_DELAY_SLOT
306
tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
307
ctx->delayed_pc = ctx->base.pc_next + 4 + B11_0s * 2;
308
- ctx->envflags |= DELAY_SLOT;
309
+ ctx->envflags |= TB_FLAG_DELAY_SLOT;
310
    return;
311
}
312
313
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
314
    CHECK_NOT_DELAY_SLOT
315
tcg_gen_xori_i32(cpu_delayed_cond, cpu_sr_t, 1);
316
ctx->delayed_pc = ctx->base.pc_next + 4 + B7_0s * 2;
317
- ctx->envflags |= DELAY_SLOT_CONDITIONAL;
318
+ ctx->envflags |= TB_FLAG_DELAY_SLOT_COND;
319
    return;
320
case 0x8900:        /* bt label */
321
    CHECK_NOT_DELAY_SLOT
322
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
323
    CHECK_NOT_DELAY_SLOT
324
tcg_gen_mov_i32(cpu_delayed_cond, cpu_sr_t);
325
ctx->delayed_pc = ctx->base.pc_next + 4 + B7_0s * 2;
326
- ctx->envflags |= DELAY_SLOT_CONDITIONAL;
327
+ ctx->envflags |= TB_FLAG_DELAY_SLOT_COND;
328
    return;
329
case 0x8800:        /* cmp/eq #imm,R0 */
330
tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, REG(0), B7_0s);
331
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
332
case 0x0023:        /* braf Rn */
333
    CHECK_NOT_DELAY_SLOT
334
tcg_gen_addi_i32(cpu_delayed_pc, REG(B11_8), ctx->base.pc_next + 4);
335
- ctx->envflags |= DELAY_SLOT;
336
+ ctx->envflags |= TB_FLAG_DELAY_SLOT;
337
    ctx->delayed_pc = (uint32_t) - 1;
338
    return;
339
case 0x0003:        /* bsrf Rn */
340
    CHECK_NOT_DELAY_SLOT
341
tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
342
    tcg_gen_add_i32(cpu_delayed_pc, REG(B11_8), cpu_pr);
343
- ctx->envflags |= DELAY_SLOT;
344
+ ctx->envflags |= TB_FLAG_DELAY_SLOT;
345
    ctx->delayed_pc = (uint32_t) - 1;
346
    return;
347
case 0x4015:        /* cmp/pl Rn */
348
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
349
case 0x402b:        /* jmp @Rn */
350
    CHECK_NOT_DELAY_SLOT
351
    tcg_gen_mov_i32(cpu_delayed_pc, REG(B11_8));
352
- ctx->envflags |= DELAY_SLOT;
353
+ ctx->envflags |= TB_FLAG_DELAY_SLOT;
354
    ctx->delayed_pc = (uint32_t) - 1;
355
    return;
356
case 0x400b:        /* jsr @Rn */
357
    CHECK_NOT_DELAY_SLOT
358
tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
359
    tcg_gen_mov_i32(cpu_delayed_pc, REG(B11_8));
360
- ctx->envflags |= DELAY_SLOT;
361
+ ctx->envflags |= TB_FLAG_DELAY_SLOT;
362
    ctx->delayed_pc = (uint32_t) - 1;
363
    return;
364
case 0x400e:        /* ldc Rm,SR */
365
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
366
fflush(stderr);
367
#endif
368
do_illegal:
369
- if (ctx->envflags & DELAY_SLOT_MASK) {
370
+ if (ctx->envflags & TB_FLAG_DELAY_SLOT_MASK) {
371
do_illegal_slot:
372
gen_save_cpu_state(ctx, true);
373
gen_helper_raise_slot_illegal_instruction(cpu_env);
374
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
375
376
do_fpu_disabled:
377
gen_save_cpu_state(ctx, true);
378
- if (ctx->envflags & DELAY_SLOT_MASK) {
379
+ if (ctx->envflags & TB_FLAG_DELAY_SLOT_MASK) {
380
gen_helper_raise_slot_fpu_disable(cpu_env);
381
} else {
382
gen_helper_raise_fpu_disable(cpu_env);
383
@@ -XXX,XX +XXX,XX @@ static void decode_opc(DisasContext * ctx)
384
385
_decode_opc(ctx);
386
387
- if (old_flags & DELAY_SLOT_MASK) {
388
+ if (old_flags & TB_FLAG_DELAY_SLOT_MASK) {
389
/* go out of the delay slot */
390
- ctx->envflags &= ~DELAY_SLOT_MASK;
391
+ ctx->envflags &= ~TB_FLAG_DELAY_SLOT_MASK;
392
393
/* When in an exclusive region, we must continue to the end
394
for conditional branches. */
395
- if (ctx->tbflags & GUSA_EXCLUSIVE
396
- && old_flags & DELAY_SLOT_CONDITIONAL) {
397
+ if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE
398
+ && old_flags & TB_FLAG_DELAY_SLOT_COND) {
399
gen_delayed_conditional_jump(ctx);
400
return;
401
}
402
/* Otherwise this is probably an invalid gUSA region.
403
Drop the GUSA bits so the next TB doesn't see them. */
404
- ctx->envflags &= ~GUSA_MASK;
405
+ ctx->envflags &= ~TB_FLAG_GUSA_MASK;
406
407
tcg_gen_movi_i32(cpu_flags, ctx->envflags);
408
- if (old_flags & DELAY_SLOT_CONDITIONAL) {
409
+ if (old_flags & TB_FLAG_DELAY_SLOT_COND) {
410
     gen_delayed_conditional_jump(ctx);
411
} else {
412
gen_jump(ctx);
413
@@ -XXX,XX +XXX,XX @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env)
414
}
415
416
/* The entire region has been translated. */
417
- ctx->envflags &= ~GUSA_MASK;
418
+ ctx->envflags &= ~TB_FLAG_GUSA_MASK;
419
ctx->base.pc_next = pc_end;
420
ctx->base.num_insns += max_insns - 1;
421
return;
422
@@ -XXX,XX +XXX,XX @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env)
423
424
/* Restart with the EXCLUSIVE bit set, within a TB run via
425
cpu_exec_step_atomic holding the exclusive lock. */
426
- ctx->envflags |= GUSA_EXCLUSIVE;
427
+ ctx->envflags |= TB_FLAG_GUSA_EXCLUSIVE;
428
gen_save_cpu_state(ctx, false);
429
gen_helper_exclusive(cpu_env);
430
ctx->base.is_jmp = DISAS_NORETURN;
431
@@ -XXX,XX +XXX,XX @@ static void sh4_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
432
(tbflags & (1 << SR_RB))) * 0x10;
433
ctx->fbank = tbflags & FPSCR_FR ? 0x10 : 0;
434
435
- if (tbflags & GUSA_MASK) {
436
+#ifdef CONFIG_USER_ONLY
437
+ if (tbflags & TB_FLAG_GUSA_MASK) {
438
+ /* In gUSA exclusive region. */
439
uint32_t pc = ctx->base.pc_next;
440
uint32_t pc_end = ctx->base.tb->cs_base;
441
- int backup = sextract32(ctx->tbflags, GUSA_SHIFT, 8);
442
+ int backup = sextract32(ctx->tbflags, TB_FLAG_GUSA_SHIFT, 8);
443
int max_insns = (pc_end - pc) / 2;
444
445
if (pc != pc_end + backup || max_insns < 2) {
446
/* This is a malformed gUSA region. Don't do anything special,
447
since the interpreter is likely to get confused. */
448
- ctx->envflags &= ~GUSA_MASK;
449
- } else if (tbflags & GUSA_EXCLUSIVE) {
450
+ ctx->envflags &= ~TB_FLAG_GUSA_MASK;
451
+ } else if (tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
452
/* Regardless of single-stepping or the end of the page,
453
we must complete execution of the gUSA region while
454
holding the exclusive lock. */
455
@@ -XXX,XX +XXX,XX @@ static void sh4_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
456
return;
457
}
458
}
459
+#endif
460
461
/* Since the ISA is fixed-width, we can bound by the number
462
of instructions remaining on the page. */
463
@@ -XXX,XX +XXX,XX @@ static void sh4_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
464
DisasContext *ctx = container_of(dcbase, DisasContext, base);
465
466
#ifdef CONFIG_USER_ONLY
467
- if (unlikely(ctx->envflags & GUSA_MASK)
468
- && !(ctx->envflags & GUSA_EXCLUSIVE)) {
469
+ if (unlikely(ctx->envflags & TB_FLAG_GUSA_MASK)
470
+ && !(ctx->envflags & TB_FLAG_GUSA_EXCLUSIVE)) {
471
/* We're in an gUSA region, and we have not already fallen
472
back on using an exclusive region. Attempt to parse the
473
region into a single supported atomic operation. Failure
474
@@ -XXX,XX +XXX,XX @@ static void sh4_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
475
{
476
DisasContext *ctx = container_of(dcbase, DisasContext, base);
477
478
- if (ctx->tbflags & GUSA_EXCLUSIVE) {
479
+ if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
480
/* Ending the region of exclusivity. Clear the bits. */
481
- ctx->envflags &= ~GUSA_MASK;
482
+ ctx->envflags &= ~TB_FLAG_GUSA_MASK;
483
}
484
485
switch (ctx->base.is_jmp) {
486
--
487
2.34.1
diff view generated by jsdifflib