1
The following changes since commit 3e08b2b9cb64bff2b73fa9128c0e49bfcde0dd40:
1
The following changes since commit 7c18f2d663521f1b31b821a13358ce38075eaf7d:
2
2
3
Merge remote-tracking branch 'remotes/philmd-gitlab/tags/edk2-next-20200121' into staging (2020-01-21 15:29:25 +0000)
3
Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging (2023-04-29 23:07:17 +0100)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://github.com/rth7680/qemu.git tags/pull-tcg-20200121
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230502
8
8
9
for you to fetch changes up to 75fa376cdab5e5db2c7fdd107358e16f95503ac6:
9
for you to fetch changes up to bdc7fba1c5a29ae218b45353daac9308fe1aae82:
10
10
11
scripts/git.orderfile: Display decodetree before C source (2020-01-21 15:26:09 -1000)
11
tcg: Introduce tcg_out_movext2 (2023-05-02 12:15:41 +0100)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Remove another limit to NB_MMU_MODES.
14
Misc tcg-related patch queue.
15
Fix compilation using uclibc.
16
Fix defaulting of -accel parameters.
17
Tidy cputlb basic routines.
18
Adjust git.orderfile for decodetree.
19
15
20
----------------------------------------------------------------
16
----------------------------------------------------------------
21
Carlos Santos (1):
17
Dickon Hood (1):
22
util/cacheinfo: fix crash when compiling with uClibc
18
qemu/bitops.h: Limit rotate amounts
23
19
24
Philippe Mathieu-Daudé (1):
20
Kiran Ostrolenk (1):
25
scripts/git.orderfile: Display decodetree before C source
21
qemu/host-utils.h: Add clz and ctz functions for lower-bit integers
26
22
27
Richard Henderson (14):
23
Nazar Kazakov (2):
28
cputlb: Handle NB_MMU_MODES > TARGET_PAGE_BITS_MIN
24
tcg: Add tcg_gen_gvec_andcs
29
vl: Remove unused variable in configure_accelerators
25
tcg: Add tcg_gen_gvec_rotrs
30
vl: Reduce scope of variables in configure_accelerators
31
vl: Remove useless test in configure_accelerators
32
vl: Only choose enabled accelerators in configure_accelerators
33
cputlb: Merge tlb_table_flush_by_mmuidx into tlb_flush_one_mmuidx_locked
34
cputlb: Make tlb_n_entries private to cputlb.c
35
cputlb: Pass CPUTLBDescFast to tlb_n_entries and sizeof_tlb
36
cputlb: Hoist tlb portions in tlb_mmu_resize_locked
37
cputlb: Hoist tlb portions in tlb_flush_one_mmuidx_locked
38
cputlb: Split out tlb_mmu_flush_locked
39
cputlb: Partially merge tlb_dyn_init into tlb_init
40
cputlb: Initialize tlbs as flushed
41
cputlb: Hoist timestamp outside of loops over tlbs
42
26
43
include/exec/cpu_ldst.h | 5 -
27
Richard Henderson (7):
44
accel/tcg/cputlb.c | 287 +++++++++++++++++++++++++++++++++---------------
28
softmmu: Tidy dirtylimit_dirty_ring_full_time
45
util/cacheinfo.c | 10 +-
29
qemu/int128: Re-shuffle Int128Alias members
46
vl.c | 27 +++--
30
migration/xbzrle: Use __attribute__((target)) for avx512
47
scripts/git.orderfile | 3 +
31
accel/tcg: Add cpu_ld*_code_mmu
48
5 files changed, 223 insertions(+), 109 deletions(-)
32
tcg/loongarch64: Conditionalize tcg_out_exts_i32_i64
33
tcg/mips: Conditionalize tcg_out_exts_i32_i64
34
tcg: Introduce tcg_out_movext2
49
35
36
Weiwei Li (1):
37
accel/tcg: Uncache the host address for instruction fetch when tlb size < 1
38
39
meson.build | 5 +--
40
accel/tcg/tcg-runtime.h | 1 +
41
include/exec/cpu_ldst.h | 9 ++++++
42
include/qemu/bitops.h | 24 +++++++++-----
43
include/qemu/host-utils.h | 54 +++++++++++++++++++++++++++++++
44
include/qemu/int128.h | 4 +--
45
include/tcg/tcg-op-gvec.h | 4 +++
46
accel/tcg/cputlb.c | 53 ++++++++++++++++++++++++++++++
47
accel/tcg/tcg-runtime-gvec.c | 11 +++++++
48
accel/tcg/user-exec.c | 58 +++++++++++++++++++++++++++++++++
49
migration/xbzrle.c | 9 +++---
50
softmmu/dirtylimit.c | 15 ++++++---
51
tcg/tcg-op-gvec.c | 28 ++++++++++++++++
52
tcg/tcg.c | 69 +++++++++++++++++++++++++++++++++++++---
53
tcg/arm/tcg-target.c.inc | 44 +++++++++++--------------
54
tcg/i386/tcg-target.c.inc | 19 +++++------
55
tcg/loongarch64/tcg-target.c.inc | 4 ++-
56
tcg/mips/tcg-target.c.inc | 4 ++-
57
18 files changed, 347 insertions(+), 68 deletions(-)
diff view generated by jsdifflib
1
No functional change, but the smaller expressions make
1
Drop inline marker: let compiler decide.
2
the code easier to read.
3
2
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Change return type to uint64_t: this matches the computation in the
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
4
return statement and the local variable assignment in the caller.
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
5
6
Rename local to dirty_ring_size_MB to fix typo.
7
Simplify conversion to MiB via qemu_target_page_bits and right shift.
8
9
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
Reviewed-by: Thomas Huth <thuth@redhat.com>
11
Reviewed-by: Juan Quintela <quintela@redhat.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
13
---
9
accel/tcg/cputlb.c | 35 +++++++++++++++++------------------
14
softmmu/dirtylimit.c | 15 ++++++++++-----
10
1 file changed, 17 insertions(+), 18 deletions(-)
15
1 file changed, 10 insertions(+), 5 deletions(-)
11
16
12
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
17
diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
13
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
14
--- a/accel/tcg/cputlb.c
19
--- a/softmmu/dirtylimit.c
15
+++ b/accel/tcg/cputlb.c
20
+++ b/softmmu/dirtylimit.c
16
@@ -XXX,XX +XXX,XX @@ static void tlb_dyn_init(CPUArchState *env)
21
@@ -XXX,XX +XXX,XX @@ bool dirtylimit_vcpu_index_valid(int cpu_index)
17
22
cpu_index >= ms->smp.max_cpus);
18
/**
23
}
19
* tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
24
20
- * @env: CPU that owns the TLB
25
-static inline int64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
21
- * @mmu_idx: MMU index of the TLB
26
+static uint64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
22
+ * @desc: The CPUTLBDesc portion of the TLB
23
+ * @fast: The CPUTLBDescFast portion of the same TLB
24
*
25
* Called with tlb_lock_held.
26
*
27
@@ -XXX,XX +XXX,XX @@ static void tlb_dyn_init(CPUArchState *env)
28
* high), since otherwise we are likely to have a significant amount of
29
* conflict misses.
30
*/
31
-static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
32
+static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
33
{
27
{
34
- CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
28
static uint64_t max_dirtyrate;
35
- size_t old_size = tlb_n_entries(&env_tlb(env)->f[mmu_idx]);
29
- uint32_t dirty_ring_size = kvm_dirty_ring_size();
36
+ size_t old_size = tlb_n_entries(fast);
30
- uint64_t dirty_ring_size_meory_MB =
37
size_t rate;
31
- dirty_ring_size * qemu_target_page_size() >> 20;
38
size_t new_size = old_size;
32
+ unsigned target_page_bits = qemu_target_page_bits();
39
int64_t now = get_clock_realtime();
33
+ uint64_t dirty_ring_size_MB;
40
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
34
+
41
return;
35
+ /* So far, the largest (non-huge) page size is 64k, i.e. 16 bits. */
36
+ assert(target_page_bits < 20);
37
+
38
+ /* Convert ring size (pages) to MiB (2**20). */
39
+ dirty_ring_size_MB = kvm_dirty_ring_size() >> (20 - target_page_bits);
40
41
if (max_dirtyrate < dirtyrate) {
42
max_dirtyrate = dirtyrate;
42
}
43
}
43
44
44
- g_free(env_tlb(env)->f[mmu_idx].table);
45
- return dirty_ring_size_meory_MB * 1000000 / max_dirtyrate;
45
- g_free(env_tlb(env)->d[mmu_idx].iotlb);
46
+ return dirty_ring_size_MB * 1000000 / max_dirtyrate;
46
+ g_free(fast->table);
47
+ g_free(desc->iotlb);
48
49
tlb_window_reset(desc, now, 0);
50
/* desc->n_used_entries is cleared by the caller */
51
- env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
52
- env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size);
53
- env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size);
54
+ fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
55
+ fast->table = g_try_new(CPUTLBEntry, new_size);
56
+ desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
57
+
58
/*
59
* If the allocations fail, try smaller sizes. We just freed some
60
* memory, so going back to half of new_size has a good chance of working.
61
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
62
* allocations to fail though, so we progressively reduce the allocation
63
* size, aborting if we cannot even allocate the smallest TLB we support.
64
*/
65
- while (env_tlb(env)->f[mmu_idx].table == NULL ||
66
- env_tlb(env)->d[mmu_idx].iotlb == NULL) {
67
+ while (fast->table == NULL || desc->iotlb == NULL) {
68
if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
69
error_report("%s: %s", __func__, strerror(errno));
70
abort();
71
}
72
new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS);
73
- env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
74
+ fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
75
76
- g_free(env_tlb(env)->f[mmu_idx].table);
77
- g_free(env_tlb(env)->d[mmu_idx].iotlb);
78
- env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size);
79
- env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size);
80
+ g_free(fast->table);
81
+ g_free(desc->iotlb);
82
+ fast->table = g_try_new(CPUTLBEntry, new_size);
83
+ desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
84
}
85
}
47
}
86
48
87
static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
49
static inline bool dirtylimit_done(uint64_t quota,
88
{
89
- tlb_mmu_resize_locked(env, mmu_idx);
90
+ tlb_mmu_resize_locked(&env_tlb(env)->d[mmu_idx], &env_tlb(env)->f[mmu_idx]);
91
env_tlb(env)->d[mmu_idx].n_used_entries = 0;
92
env_tlb(env)->d[mmu_idx].large_page_addr = -1;
93
env_tlb(env)->d[mmu_idx].large_page_mask = -1;
94
--
50
--
95
2.20.1
51
2.34.1
96
52
97
53
diff view generated by jsdifflib
1
Do not call get_clock_realtime() in tlb_mmu_resize_locked,
1
From: Weiwei Li <liweiwei@iscas.ac.cn>
2
but hoist outside of any loop over a set of tlbs. This is
3
only two (indirect) callers, tlb_flush_by_mmuidx_async_work
4
and tlb_flush_page_locked, so not onerous.
5
2
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
When PMP entry overlap part of the page, we'll set the tlb_size to 1, which
7
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
4
will make the address in tlb entry set with TLB_INVALID_MASK, and the next
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
5
access will again go through tlb_fill.However, this way will not work in
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
tb_gen_code() => get_page_addr_code_hostp(): the TLB host address will be
7
cached, and the following instructions can use this host address directly
8
which may lead to the bypass of PMP related check.
9
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1542.
10
11
Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn>
12
Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn>
13
Reviewed-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
14
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
15
Message-Id: <20230422130329.23555-6-liweiwei@iscas.ac.cn>
10
---
16
---
11
accel/tcg/cputlb.c | 14 ++++++++------
17
accel/tcg/cputlb.c | 5 +++++
12
1 file changed, 8 insertions(+), 6 deletions(-)
18
1 file changed, 5 insertions(+)
13
19
14
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
20
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
15
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
16
--- a/accel/tcg/cputlb.c
22
--- a/accel/tcg/cputlb.c
17
+++ b/accel/tcg/cputlb.c
23
+++ b/accel/tcg/cputlb.c
18
@@ -XXX,XX +XXX,XX @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
24
@@ -XXX,XX +XXX,XX @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
19
* high), since otherwise we are likely to have a significant amount of
25
if (p == NULL) {
20
* conflict misses.
26
return -1;
21
*/
22
-static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
23
+static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
24
+ int64_t now)
25
{
26
size_t old_size = tlb_n_entries(fast);
27
size_t rate;
28
size_t new_size = old_size;
29
- int64_t now = get_clock_realtime();
30
int64_t window_len_ms = 100;
31
int64_t window_len_ns = window_len_ms * 1000 * 1000;
32
bool window_expired = now > desc->window_begin_ns + window_len_ns;
33
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
34
memset(desc->vtable, -1, sizeof(desc->vtable));
35
}
36
37
-static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
38
+static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx,
39
+ int64_t now)
40
{
41
CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
42
CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
43
44
- tlb_mmu_resize_locked(desc, fast);
45
+ tlb_mmu_resize_locked(desc, fast, now);
46
tlb_mmu_flush_locked(desc, fast);
47
}
48
49
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
50
CPUArchState *env = cpu->env_ptr;
51
uint16_t asked = data.host_int;
52
uint16_t all_dirty, work, to_clean;
53
+ int64_t now = get_clock_realtime();
54
55
assert_cpu_is_self(cpu);
56
57
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
58
59
for (work = to_clean; work != 0; work &= work - 1) {
60
int mmu_idx = ctz32(work);
61
- tlb_flush_one_mmuidx_locked(env, mmu_idx);
62
+ tlb_flush_one_mmuidx_locked(env, mmu_idx, now);
63
}
27
}
64
28
+
65
qemu_spin_unlock(&env_tlb(env)->c.lock);
29
+ if (full->lg_page_size < TARGET_PAGE_BITS) {
66
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_locked(CPUArchState *env, int midx,
30
+ return -1;
67
tlb_debug("forcing full flush midx %d ("
31
+ }
68
TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
32
+
69
midx, lp_addr, lp_mask);
33
if (hostp) {
70
- tlb_flush_one_mmuidx_locked(env, midx);
34
*hostp = p;
71
+ tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
35
}
72
} else {
73
if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) {
74
tlb_n_used_entries_dec(env, midx);
75
--
36
--
76
2.20.1
37
2.34.1
77
78
diff view generated by jsdifflib
1
In target/arm we will shortly have "too many" mmu_idx.
1
From: Dickon Hood <dickon.hood@codethink.co.uk>
2
The current minimum barrier is caused by the way in which
3
tlb_flush_page_by_mmuidx is coded.
4
2
5
We can remove this limitation by allocating memory for
3
Rotates have been fixed up to only allow for reasonable rotate amounts
6
consumption by the worker. Let us assume that this is
4
(ie, no rotates >7 on an 8b value etc.) This fixes a problem with riscv
7
the unlikely case, as will be the case for the majority
5
vector rotate instructions.
8
of targets which have so far satisfied the BUILD_BUG_ON,
9
and only allocate memory when necessary.
10
6
11
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Dickon Hood <dickon.hood@codethink.co.uk>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-Id: <20230428144757.57530-9-lawrence.hunter@codethink.co.uk>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
11
---
14
accel/tcg/cputlb.c | 167 +++++++++++++++++++++++++++++++++++----------
12
include/qemu/bitops.h | 24 ++++++++++++++++--------
15
1 file changed, 132 insertions(+), 35 deletions(-)
13
1 file changed, 16 insertions(+), 8 deletions(-)
16
14
17
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
15
diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h
18
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
19
--- a/accel/tcg/cputlb.c
17
--- a/include/qemu/bitops.h
20
+++ b/accel/tcg/cputlb.c
18
+++ b/include/qemu/bitops.h
21
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_locked(CPUArchState *env, int midx,
19
@@ -XXX,XX +XXX,XX @@ static inline unsigned long find_first_zero_bit(const unsigned long *addr,
22
}
20
*/
21
static inline uint8_t rol8(uint8_t word, unsigned int shift)
22
{
23
- return (word << shift) | (word >> ((8 - shift) & 7));
24
+ shift &= 7;
25
+ return (word << shift) | (word >> (8 - shift));
23
}
26
}
24
27
25
-/* As we are going to hijack the bottom bits of the page address for a
28
/**
26
- * mmuidx bit mask we need to fail to build if we can't do that
29
@@ -XXX,XX +XXX,XX @@ static inline uint8_t rol8(uint8_t word, unsigned int shift)
27
+/**
28
+ * tlb_flush_page_by_mmuidx_async_0:
29
+ * @cpu: cpu on which to flush
30
+ * @addr: page of virtual address to flush
31
+ * @idxmap: set of mmu_idx to flush
32
+ *
33
+ * Helper for tlb_flush_page_by_mmuidx and friends, flush one page
34
+ * at @addr from the tlbs indicated by @idxmap from @cpu.
35
*/
30
*/
36
-QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN);
31
static inline uint8_t ror8(uint8_t word, unsigned int shift)
37
-
38
-static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
39
- run_on_cpu_data data)
40
+static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu,
41
+ target_ulong addr,
42
+ uint16_t idxmap)
43
{
32
{
44
CPUArchState *env = cpu->env_ptr;
33
- return (word >> shift) | (word << ((8 - shift) & 7));
45
- target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
34
+ shift &= 7;
46
- target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
35
+ return (word >> shift) | (word << (8 - shift));
47
- unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
48
int mmu_idx;
49
50
assert_cpu_is_self(cpu);
51
52
- tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%lx\n",
53
- addr, mmu_idx_bitmap);
54
+ tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%x\n", addr, idxmap);
55
56
qemu_spin_lock(&env_tlb(env)->c.lock);
57
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
58
- if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
59
+ if ((idxmap >> mmu_idx) & 1) {
60
tlb_flush_page_locked(env, mmu_idx, addr);
61
}
62
}
63
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
64
tb_flush_jmp_cache(cpu, addr);
65
}
36
}
66
37
67
+/**
38
/**
68
+ * tlb_flush_page_by_mmuidx_async_1:
39
@@ -XXX,XX +XXX,XX @@ static inline uint8_t ror8(uint8_t word, unsigned int shift)
69
+ * @cpu: cpu on which to flush
40
*/
70
+ * @data: encoded addr + idxmap
41
static inline uint16_t rol16(uint16_t word, unsigned int shift)
71
+ *
72
+ * Helper for tlb_flush_page_by_mmuidx and friends, called through
73
+ * async_run_on_cpu. The idxmap parameter is encoded in the page
74
+ * offset of the target_ptr field. This limits the set of mmu_idx
75
+ * that can be passed via this method.
76
+ */
77
+static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu,
78
+ run_on_cpu_data data)
79
+{
80
+ target_ulong addr_and_idxmap = (target_ulong) data.target_ptr;
81
+ target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK;
82
+ uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK;
83
+
84
+ tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
85
+}
86
+
87
+typedef struct {
88
+ target_ulong addr;
89
+ uint16_t idxmap;
90
+} TLBFlushPageByMMUIdxData;
91
+
92
+/**
93
+ * tlb_flush_page_by_mmuidx_async_2:
94
+ * @cpu: cpu on which to flush
95
+ * @data: allocated addr + idxmap
96
+ *
97
+ * Helper for tlb_flush_page_by_mmuidx and friends, called through
98
+ * async_run_on_cpu. The addr+idxmap parameters are stored in a
99
+ * TLBFlushPageByMMUIdxData structure that has been allocated
100
+ * specifically for this helper. Free the structure when done.
101
+ */
102
+static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu,
103
+ run_on_cpu_data data)
104
+{
105
+ TLBFlushPageByMMUIdxData *d = data.host_ptr;
106
+
107
+ tlb_flush_page_by_mmuidx_async_0(cpu, d->addr, d->idxmap);
108
+ g_free(d);
109
+}
110
+
111
void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
112
{
42
{
113
- target_ulong addr_and_mmu_idx;
43
- return (word << shift) | (word >> ((16 - shift) & 15));
114
-
44
+ shift &= 15;
115
tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
45
+ return (word << shift) | (word >> (16 - shift));
116
117
/* This should already be page aligned */
118
- addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
119
- addr_and_mmu_idx |= idxmap;
120
+ addr &= TARGET_PAGE_MASK;
121
122
- if (!qemu_cpu_is_self(cpu)) {
123
- async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_work,
124
- RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
125
+ if (qemu_cpu_is_self(cpu)) {
126
+ tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
127
+ } else if (idxmap < TARGET_PAGE_SIZE) {
128
+ /*
129
+ * Most targets have only a few mmu_idx. In the case where
130
+ * we can stuff idxmap into the low TARGET_PAGE_BITS, avoid
131
+ * allocating memory for this operation.
132
+ */
133
+ async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_1,
134
+ RUN_ON_CPU_TARGET_PTR(addr | idxmap));
135
} else {
136
- tlb_flush_page_by_mmuidx_async_work(
137
- cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
138
+ TLBFlushPageByMMUIdxData *d = g_new(TLBFlushPageByMMUIdxData, 1);
139
+
140
+ /* Otherwise allocate a structure, freed by the worker. */
141
+ d->addr = addr;
142
+ d->idxmap = idxmap;
143
+ async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_2,
144
+ RUN_ON_CPU_HOST_PTR(d));
145
}
146
}
46
}
147
47
148
@@ -XXX,XX +XXX,XX @@ void tlb_flush_page(CPUState *cpu, target_ulong addr)
48
/**
149
void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
49
@@ -XXX,XX +XXX,XX @@ static inline uint16_t rol16(uint16_t word, unsigned int shift)
150
uint16_t idxmap)
50
*/
51
static inline uint16_t ror16(uint16_t word, unsigned int shift)
151
{
52
{
152
- const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
53
- return (word >> shift) | (word << ((16 - shift) & 15));
153
- target_ulong addr_and_mmu_idx;
54
+ shift &= 15;
154
-
55
+ return (word >> shift) | (word << (16 - shift));
155
tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
156
157
/* This should already be page aligned */
158
- addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
159
- addr_and_mmu_idx |= idxmap;
160
+ addr &= TARGET_PAGE_MASK;
161
162
- flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
163
- fn(src_cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
164
+ /*
165
+ * Allocate memory to hold addr+idxmap only when needed.
166
+ * See tlb_flush_page_by_mmuidx for details.
167
+ */
168
+ if (idxmap < TARGET_PAGE_SIZE) {
169
+ flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
170
+ RUN_ON_CPU_TARGET_PTR(addr | idxmap));
171
+ } else {
172
+ CPUState *dst_cpu;
173
+
174
+ /* Allocate a separate data block for each destination cpu. */
175
+ CPU_FOREACH(dst_cpu) {
176
+ if (dst_cpu != src_cpu) {
177
+ TLBFlushPageByMMUIdxData *d
178
+ = g_new(TLBFlushPageByMMUIdxData, 1);
179
+
180
+ d->addr = addr;
181
+ d->idxmap = idxmap;
182
+ async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
183
+ RUN_ON_CPU_HOST_PTR(d));
184
+ }
185
+ }
186
+ }
187
+
188
+ tlb_flush_page_by_mmuidx_async_0(src_cpu, addr, idxmap);
189
}
56
}
190
57
191
void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
58
/**
192
@@ -XXX,XX +XXX,XX @@ void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
59
@@ -XXX,XX +XXX,XX @@ static inline uint16_t ror16(uint16_t word, unsigned int shift)
193
target_ulong addr,
60
*/
194
uint16_t idxmap)
61
static inline uint32_t rol32(uint32_t word, unsigned int shift)
195
{
62
{
196
- const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
63
- return (word << shift) | (word >> ((32 - shift) & 31));
197
- target_ulong addr_and_mmu_idx;
64
+ shift &= 31;
198
-
65
+ return (word << shift) | (word >> (32 - shift));
199
tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
200
201
/* This should already be page aligned */
202
- addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
203
- addr_and_mmu_idx |= idxmap;
204
+ addr &= TARGET_PAGE_MASK;
205
206
- flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
207
- async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
208
+ /*
209
+ * Allocate memory to hold addr+idxmap only when needed.
210
+ * See tlb_flush_page_by_mmuidx for details.
211
+ */
212
+ if (idxmap < TARGET_PAGE_SIZE) {
213
+ flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
214
+ RUN_ON_CPU_TARGET_PTR(addr | idxmap));
215
+ async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_1,
216
+ RUN_ON_CPU_TARGET_PTR(addr | idxmap));
217
+ } else {
218
+ CPUState *dst_cpu;
219
+ TLBFlushPageByMMUIdxData *d;
220
+
221
+ /* Allocate a separate data block for each destination cpu. */
222
+ CPU_FOREACH(dst_cpu) {
223
+ if (dst_cpu != src_cpu) {
224
+ d = g_new(TLBFlushPageByMMUIdxData, 1);
225
+ d->addr = addr;
226
+ d->idxmap = idxmap;
227
+ async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
228
+ RUN_ON_CPU_HOST_PTR(d));
229
+ }
230
+ }
231
+
232
+ d = g_new(TLBFlushPageByMMUIdxData, 1);
233
+ d->addr = addr;
234
+ d->idxmap = idxmap;
235
+ async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_2,
236
+ RUN_ON_CPU_HOST_PTR(d));
237
+ }
238
}
66
}
239
67
240
void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr)
68
/**
69
@@ -XXX,XX +XXX,XX @@ static inline uint32_t rol32(uint32_t word, unsigned int shift)
70
*/
71
static inline uint32_t ror32(uint32_t word, unsigned int shift)
72
{
73
- return (word >> shift) | (word << ((32 - shift) & 31));
74
+ shift &= 31;
75
+ return (word >> shift) | (word << (32 - shift));
76
}
77
78
/**
79
@@ -XXX,XX +XXX,XX @@ static inline uint32_t ror32(uint32_t word, unsigned int shift)
80
*/
81
static inline uint64_t rol64(uint64_t word, unsigned int shift)
82
{
83
- return (word << shift) | (word >> ((64 - shift) & 63));
84
+ shift &= 63;
85
+ return (word << shift) | (word >> (64 - shift));
86
}
87
88
/**
89
@@ -XXX,XX +XXX,XX @@ static inline uint64_t rol64(uint64_t word, unsigned int shift)
90
*/
91
static inline uint64_t ror64(uint64_t word, unsigned int shift)
92
{
93
- return (word >> shift) | (word << ((64 - shift) & 63));
94
+ shift &= 63;
95
+ return (word >> shift) | (word << (64 - shift));
96
}
97
98
/**
241
--
99
--
242
2.20.1
100
2.34.1
243
244
diff view generated by jsdifflib
Deleted patch
1
From: Carlos Santos <casantos@redhat.com>
2
1
3
uClibc defines _SC_LEVEL1_ICACHE_LINESIZE and _SC_LEVEL1_DCACHE_LINESIZE
4
but the corresponding sysconf calls returns -1, which is a valid result,
5
meaning that the limit is indeterminate.
6
7
Handle this situation using the fallback values instead of crashing due
8
to an assertion failure.
9
10
Signed-off-by: Carlos Santos <casantos@redhat.com>
11
Message-Id: <20191017123713.30192-1-casantos@redhat.com>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
14
util/cacheinfo.c | 10 ++++++++--
15
1 file changed, 8 insertions(+), 2 deletions(-)
16
17
diff --git a/util/cacheinfo.c b/util/cacheinfo.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/util/cacheinfo.c
20
+++ b/util/cacheinfo.c
21
@@ -XXX,XX +XXX,XX @@ static void sys_cache_info(int *isize, int *dsize)
22
static void sys_cache_info(int *isize, int *dsize)
23
{
24
# ifdef _SC_LEVEL1_ICACHE_LINESIZE
25
- *isize = sysconf(_SC_LEVEL1_ICACHE_LINESIZE);
26
+ int tmp_isize = (int) sysconf(_SC_LEVEL1_ICACHE_LINESIZE);
27
+ if (tmp_isize > 0) {
28
+ *isize = tmp_isize;
29
+ }
30
# endif
31
# ifdef _SC_LEVEL1_DCACHE_LINESIZE
32
- *dsize = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
33
+ int tmp_dsize = (int) sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
34
+ if (tmp_dsize > 0) {
35
+ *dsize = tmp_dsize;
36
+ }
37
# endif
38
}
39
#endif /* sys_cache_info */
40
--
41
2.20.1
42
43
diff view generated by jsdifflib
Deleted patch
1
The accel_initialised variable no longer has any setters.
2
1
3
Fixes: 6f6e1698a68c
4
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Reviewed by: Aleksandar Markovic <amarkovic@wavecomp.com>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
10
vl.c | 3 +--
11
1 file changed, 1 insertion(+), 2 deletions(-)
12
13
diff --git a/vl.c b/vl.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/vl.c
16
+++ b/vl.c
17
@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
18
{
19
const char *accel;
20
char **accel_list, **tmp;
21
- bool accel_initialised = false;
22
bool init_failed = false;
23
24
qemu_opts_foreach(qemu_find_opts("icount"),
25
@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
26
27
accel_list = g_strsplit(accel, ":", 0);
28
29
- for (tmp = accel_list; !accel_initialised && tmp && *tmp; tmp++) {
30
+ for (tmp = accel_list; tmp && *tmp; tmp++) {
31
/*
32
* Filter invalid accelerators here, to prevent obscenities
33
* such as "-machine accel=tcg,,thread=single".
34
--
35
2.20.1
36
37
diff view generated by jsdifflib
Deleted patch
1
The accel_list and tmp variables are only used when manufacturing
2
-machine accel, options based on -accel.
3
1
4
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed by: Aleksandar Markovic <amarkovic@wavecomp.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
vl.c | 3 ++-
10
1 file changed, 2 insertions(+), 1 deletion(-)
11
12
diff --git a/vl.c b/vl.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/vl.c
15
+++ b/vl.c
16
@@ -XXX,XX +XXX,XX @@ static int do_configure_accelerator(void *opaque, QemuOpts *opts, Error **errp)
17
static void configure_accelerators(const char *progname)
18
{
19
const char *accel;
20
- char **accel_list, **tmp;
21
bool init_failed = false;
22
23
qemu_opts_foreach(qemu_find_opts("icount"),
24
@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
25
26
accel = qemu_opt_get(qemu_get_machine_opts(), "accel");
27
if (QTAILQ_EMPTY(&qemu_accel_opts.head)) {
28
+ char **accel_list, **tmp;
29
+
30
if (accel == NULL) {
31
/* Select the default accelerator */
32
if (!accel_find("tcg") && !accel_find("kvm")) {
33
--
34
2.20.1
35
36
diff view generated by jsdifflib
Deleted patch
1
The result of g_strsplit is never NULL.
2
1
3
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
6
Reviewed by: Aleksandar Markovic <amarkovic@wavecomp.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
vl.c | 2 +-
10
1 file changed, 1 insertion(+), 1 deletion(-)
11
12
diff --git a/vl.c b/vl.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/vl.c
15
+++ b/vl.c
16
@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
17
18
accel_list = g_strsplit(accel, ":", 0);
19
20
- for (tmp = accel_list; tmp && *tmp; tmp++) {
21
+ for (tmp = accel_list; *tmp; tmp++) {
22
/*
23
* Filter invalid accelerators here, to prevent obscenities
24
* such as "-machine accel=tcg,,thread=single".
25
--
26
2.20.1
27
28
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
From: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
2
2
3
To avoid scrolling each instruction when reviewing tcg
3
This is for use in the RISC-V vclz and vctz instructions (implemented in
4
helpers written for the decodetree script, display the
4
proceeding commit).
5
.decode files (similar to header declarations) before
6
the C source (implementation of previous declarations).
7
5
8
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
6
Signed-off-by: Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
9
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
8
Message-Id: <20230428144757.57530-11-lawrence.hunter@codethink.co.uk>
11
Message-Id: <20191230082856.30556-1-philmd@redhat.com>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
10
---
14
scripts/git.orderfile | 3 +++
11
include/qemu/host-utils.h | 54 +++++++++++++++++++++++++++++++++++++++
15
1 file changed, 3 insertions(+)
12
1 file changed, 54 insertions(+)
16
13
17
diff --git a/scripts/git.orderfile b/scripts/git.orderfile
14
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
18
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
19
--- a/scripts/git.orderfile
16
--- a/include/qemu/host-utils.h
20
+++ b/scripts/git.orderfile
17
+++ b/include/qemu/host-utils.h
21
@@ -XXX,XX +XXX,XX @@ qga/*.json
18
@@ -XXX,XX +XXX,XX @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
22
# headers
19
}
23
*.h
20
#endif
24
21
25
+# decoding tree specification
22
+/**
26
+*.decode
23
+ * clz8 - count leading zeros in a 8-bit value.
24
+ * @val: The value to search
25
+ *
26
+ * Returns 8 if the value is zero. Note that the GCC builtin is
27
+ * undefined if the value is zero.
28
+ *
29
+ * Note that the GCC builtin will upcast its argument to an `unsigned int`
30
+ * so this function subtracts off the number of prepended zeroes.
31
+ */
32
+static inline int clz8(uint8_t val)
33
+{
34
+ return val ? __builtin_clz(val) - 24 : 8;
35
+}
27
+
36
+
28
# code
37
+/**
29
*.c
38
+ * clz16 - count leading zeros in a 16-bit value.
39
+ * @val: The value to search
40
+ *
41
+ * Returns 16 if the value is zero. Note that the GCC builtin is
42
+ * undefined if the value is zero.
43
+ *
44
+ * Note that the GCC builtin will upcast its argument to an `unsigned int`
45
+ * so this function subtracts off the number of prepended zeroes.
46
+ */
47
+static inline int clz16(uint16_t val)
48
+{
49
+ return val ? __builtin_clz(val) - 16 : 16;
50
+}
51
+
52
/**
53
* clz32 - count leading zeros in a 32-bit value.
54
* @val: The value to search
55
@@ -XXX,XX +XXX,XX @@ static inline int clo64(uint64_t val)
56
return clz64(~val);
57
}
58
59
+/**
60
+ * ctz8 - count trailing zeros in a 8-bit value.
61
+ * @val: The value to search
62
+ *
63
+ * Returns 8 if the value is zero. Note that the GCC builtin is
64
+ * undefined if the value is zero.
65
+ */
66
+static inline int ctz8(uint8_t val)
67
+{
68
+ return val ? __builtin_ctz(val) : 8;
69
+}
70
+
71
+/**
72
+ * ctz16 - count trailing zeros in a 16-bit value.
73
+ * @val: The value to search
74
+ *
75
+ * Returns 16 if the value is zero. Note that the GCC builtin is
76
+ * undefined if the value is zero.
77
+ */
78
+static inline int ctz16(uint16_t val)
79
+{
80
+ return val ? __builtin_ctz(val) : 16;
81
+}
82
+
83
/**
84
* ctz32 - count trailing zeros in a 32-bit value.
85
* @val: The value to search
30
--
86
--
31
2.20.1
87
2.34.1
32
33
diff view generated by jsdifflib
1
There is only one caller for tlb_table_flush_by_mmuidx. Place
1
From: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
2
the result at the earlier line number, due to an expected user
3
in the near future.
4
2
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Add tcg expander and helper functions for and-compliment
6
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
4
vector with scalar operand.
5
6
Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
7
Message-Id: <20230428144757.57530-10-lawrence.hunter@codethink.co.uk>
8
[rth: Split out of larger patch.]
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
10
---
9
accel/tcg/cputlb.c | 19 +++++++------------
11
accel/tcg/tcg-runtime.h | 1 +
10
1 file changed, 7 insertions(+), 12 deletions(-)
12
include/tcg/tcg-op-gvec.h | 2 ++
13
accel/tcg/tcg-runtime-gvec.c | 11 +++++++++++
14
tcg/tcg-op-gvec.c | 17 +++++++++++++++++
15
4 files changed, 31 insertions(+)
11
16
12
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
17
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
13
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
14
--- a/accel/tcg/cputlb.c
19
--- a/accel/tcg/tcg-runtime.h
15
+++ b/accel/tcg/cputlb.c
20
+++ b/accel/tcg/tcg-runtime.h
16
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
21
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_4(gvec_nor, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
17
}
22
DEF_HELPER_FLAGS_4(gvec_eqv, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
23
24
DEF_HELPER_FLAGS_4(gvec_ands, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
25
+DEF_HELPER_FLAGS_4(gvec_andcs, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
26
DEF_HELPER_FLAGS_4(gvec_xors, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
27
DEF_HELPER_FLAGS_4(gvec_ors, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
28
29
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
30
index XXXXXXX..XXXXXXX 100644
31
--- a/include/tcg/tcg-op-gvec.h
32
+++ b/include/tcg/tcg-op-gvec.h
33
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_ori(unsigned vece, uint32_t dofs, uint32_t aofs,
34
35
void tcg_gen_gvec_ands(unsigned vece, uint32_t dofs, uint32_t aofs,
36
TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
37
+void tcg_gen_gvec_andcs(unsigned vece, uint32_t dofs, uint32_t aofs,
38
+ TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
39
void tcg_gen_gvec_xors(unsigned vece, uint32_t dofs, uint32_t aofs,
40
TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
41
void tcg_gen_gvec_ors(unsigned vece, uint32_t dofs, uint32_t aofs,
42
diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/accel/tcg/tcg-runtime-gvec.c
45
+++ b/accel/tcg/tcg-runtime-gvec.c
46
@@ -XXX,XX +XXX,XX @@ void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
47
clear_high(d, oprsz, desc);
18
}
48
}
19
49
20
-static inline void tlb_table_flush_by_mmuidx(CPUArchState *env, int mmu_idx)
50
+void HELPER(gvec_andcs)(void *d, void *a, uint64_t b, uint32_t desc)
21
+static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
51
+{
52
+ intptr_t oprsz = simd_oprsz(desc);
53
+ intptr_t i;
54
+
55
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
56
+ *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & ~b;
57
+ }
58
+ clear_high(d, oprsz, desc);
59
+}
60
+
61
void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
22
{
62
{
23
tlb_mmu_resize_locked(env, mmu_idx);
63
intptr_t oprsz = simd_oprsz(desc);
24
- memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx));
64
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
25
env_tlb(env)->d[mmu_idx].n_used_entries = 0;
65
index XXXXXXX..XXXXXXX 100644
26
+ env_tlb(env)->d[mmu_idx].large_page_addr = -1;
66
--- a/tcg/tcg-op-gvec.c
27
+ env_tlb(env)->d[mmu_idx].large_page_mask = -1;
67
+++ b/tcg/tcg-op-gvec.c
28
+ env_tlb(env)->d[mmu_idx].vindex = 0;
68
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_andi(unsigned vece, uint32_t dofs, uint32_t aofs,
29
+ memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx));
69
tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_ands);
30
+ memset(env_tlb(env)->d[mmu_idx].vtable, -1,
31
+ sizeof(env_tlb(env)->d[0].vtable));
32
}
70
}
33
71
34
static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
72
+void tcg_gen_gvec_andcs(unsigned vece, uint32_t dofs, uint32_t aofs,
35
@@ -XXX,XX +XXX,XX @@ void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide)
73
+ TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
36
*pelide = elide;
74
+{
37
}
75
+ static GVecGen2s g = {
38
76
+ .fni8 = tcg_gen_andc_i64,
39
-static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
77
+ .fniv = tcg_gen_andc_vec,
40
-{
78
+ .fno = gen_helper_gvec_andcs,
41
- tlb_table_flush_by_mmuidx(env, mmu_idx);
79
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
42
- env_tlb(env)->d[mmu_idx].large_page_addr = -1;
80
+ .vece = MO_64
43
- env_tlb(env)->d[mmu_idx].large_page_mask = -1;
81
+ };
44
- env_tlb(env)->d[mmu_idx].vindex = 0;
82
+
45
- memset(env_tlb(env)->d[mmu_idx].vtable, -1,
83
+ TCGv_i64 tmp = tcg_temp_ebb_new_i64();
46
- sizeof(env_tlb(env)->d[0].vtable));
84
+ tcg_gen_dup_i64(vece, tmp, c);
47
-}
85
+ tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, c, &g);
48
-
86
+ tcg_temp_free_i64(tmp);
49
static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
87
+}
50
{
88
+
51
CPUArchState *env = cpu->env_ptr;
89
static const GVecGen2s gop_xors = {
90
.fni8 = tcg_gen_xor_i64,
91
.fniv = tcg_gen_xor_vec,
52
--
92
--
53
2.20.1
93
2.34.1
54
55
diff view generated by jsdifflib
1
We do not need the entire CPUArchState to compute these values.
1
From: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
2
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Add tcg expander and helper functions for rotate right
4
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
4
vector with scalar operand.
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
5
6
Signed-off-by: Nazar Kazakov <nazar.kazakov@codethink.co.uk>
7
Message-Id: <20230428144757.57530-10-lawrence.hunter@codethink.co.uk>
8
[rth: Split out of larger patch; mask rotation count.]
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
10
---
8
accel/tcg/cputlb.c | 15 ++++++++-------
11
include/tcg/tcg-op-gvec.h | 2 ++
9
1 file changed, 8 insertions(+), 7 deletions(-)
12
tcg/tcg-op-gvec.c | 11 +++++++++++
13
2 files changed, 13 insertions(+)
10
14
11
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
15
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
12
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
13
--- a/accel/tcg/cputlb.c
17
--- a/include/tcg/tcg-op-gvec.h
14
+++ b/accel/tcg/cputlb.c
18
+++ b/include/tcg/tcg-op-gvec.h
15
@@ -XXX,XX +XXX,XX @@ QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
19
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_sars(unsigned vece, uint32_t dofs, uint32_t aofs,
16
QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
20
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
17
#define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
21
void tcg_gen_gvec_rotls(unsigned vece, uint32_t dofs, uint32_t aofs,
18
22
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
19
-static inline size_t tlb_n_entries(CPUArchState *env, uintptr_t mmu_idx)
23
+void tcg_gen_gvec_rotrs(unsigned vece, uint32_t dofs, uint32_t aofs,
20
+static inline size_t tlb_n_entries(CPUTLBDescFast *fast)
24
+ TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
21
{
25
22
- return (env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS) + 1;
26
/*
23
+ return (fast->mask >> CPU_TLB_ENTRY_BITS) + 1;
27
* Perform vector shift by vector element, modulo the element size.
28
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/tcg/tcg-op-gvec.c
31
+++ b/tcg/tcg-op-gvec.c
32
@@ -XXX,XX +XXX,XX @@ void tcg_gen_gvec_rotls(unsigned vece, uint32_t dofs, uint32_t aofs,
33
do_gvec_shifts(vece, dofs, aofs, shift, oprsz, maxsz, &g);
24
}
34
}
25
35
26
-static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx)
36
+void tcg_gen_gvec_rotrs(unsigned vece, uint32_t dofs, uint32_t aofs,
27
+static inline size_t sizeof_tlb(CPUTLBDescFast *fast)
37
+ TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz)
28
{
38
+{
29
- return env_tlb(env)->f[mmu_idx].mask + (1 << CPU_TLB_ENTRY_BITS);
39
+ TCGv_i32 tmp = tcg_temp_ebb_new_i32();
30
+ return fast->mask + (1 << CPU_TLB_ENTRY_BITS);
40
+
31
}
41
+ tcg_gen_neg_i32(tmp, shift);
32
42
+ tcg_gen_andi_i32(tmp, tmp, (8 << vece) - 1);
33
static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
43
+ tcg_gen_gvec_rotls(vece, dofs, aofs, tmp, oprsz, maxsz);
34
@@ -XXX,XX +XXX,XX @@ static void tlb_dyn_init(CPUArchState *env)
44
+ tcg_temp_free_i32(tmp);
35
static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
45
+}
36
{
46
+
37
CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
47
/*
38
- size_t old_size = tlb_n_entries(env, mmu_idx);
48
* Expand D = A << (B % element bits)
39
+ size_t old_size = tlb_n_entries(&env_tlb(env)->f[mmu_idx]);
49
*
40
size_t rate;
41
size_t new_size = old_size;
42
int64_t now = get_clock_realtime();
43
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
44
env_tlb(env)->d[mmu_idx].large_page_addr = -1;
45
env_tlb(env)->d[mmu_idx].large_page_mask = -1;
46
env_tlb(env)->d[mmu_idx].vindex = 0;
47
- memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx));
48
+ memset(env_tlb(env)->f[mmu_idx].table, -1,
49
+ sizeof_tlb(&env_tlb(env)->f[mmu_idx]));
50
memset(env_tlb(env)->d[mmu_idx].vtable, -1,
51
sizeof(env_tlb(env)->d[0].vtable));
52
}
53
@@ -XXX,XX +XXX,XX @@ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
54
qemu_spin_lock(&env_tlb(env)->c.lock);
55
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
56
unsigned int i;
57
- unsigned int n = tlb_n_entries(env, mmu_idx);
58
+ unsigned int n = tlb_n_entries(&env_tlb(env)->f[mmu_idx]);
59
60
for (i = 0; i < n; i++) {
61
tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i],
62
--
50
--
63
2.20.1
51
2.34.1
64
65
diff view generated by jsdifflib
1
By choosing "tcg:kvm" when kvm is not enabled, we generate
1
Clang 14, with --enable-tcg-interpreter errors with
2
an incorrect warning: "invalid accelerator kvm".
3
2
4
At the same time, use g_str_has_suffix rather than open-coding
3
include/qemu/int128.h:487:16: error: alignment of field 'i' (128 bits)
5
the same operation.
4
does not match the alignment of the first field in transparent union;
5
transparent_union attribute ignored [-Werror,-Wignored-attributes]
6
__int128_t i;
7
^
8
include/qemu/int128.h:486:12: note: alignment of first field is 64 bits
9
Int128 s;
10
^
11
1 error generated.
6
12
7
Presumably the inverse is also true with --disable-tcg.
13
By placing the __uint128_t member first, this is avoided.
8
14
9
Fixes: 28a0961757fc
15
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
11
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
16
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
12
Reviewed by: Aleksandar Markovic <amarkovic@wavecomp.com>
17
Message-Id: <20230501204625.277361-1-richard.henderson@linaro.org>
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
---
18
---
15
vl.c | 21 +++++++++++++--------
19
include/qemu/int128.h | 4 ++--
16
1 file changed, 13 insertions(+), 8 deletions(-)
20
1 file changed, 2 insertions(+), 2 deletions(-)
17
21
18
diff --git a/vl.c b/vl.c
22
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
19
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
20
--- a/vl.c
24
--- a/include/qemu/int128.h
21
+++ b/vl.c
25
+++ b/include/qemu/int128.h
22
@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
26
@@ -XXX,XX +XXX,XX @@ static inline void bswap128s(Int128 *s)
23
27
*/
24
if (accel == NULL) {
28
#ifdef CONFIG_INT128
25
/* Select the default accelerator */
29
typedef union {
26
- if (!accel_find("tcg") && !accel_find("kvm")) {
30
- Int128 s;
27
- error_report("No accelerator selected and"
31
- __int128_t i;
28
- " no default accelerator available");
32
__uint128_t u;
29
- exit(1);
33
+ __int128_t i;
30
- } else {
34
+ Int128 s;
31
- int pnlen = strlen(progname);
35
} Int128Alias __attribute__((transparent_union));
32
- if (pnlen >= 3 && g_str_equal(&progname[pnlen - 3], "kvm")) {
36
#else
33
+ bool have_tcg = accel_find("tcg");
37
typedef Int128 Int128Alias;
34
+ bool have_kvm = accel_find("kvm");
35
+
36
+ if (have_tcg && have_kvm) {
37
+ if (g_str_has_suffix(progname, "kvm")) {
38
/* If the program name ends with "kvm", we prefer KVM */
39
accel = "kvm:tcg";
40
} else {
41
accel = "tcg:kvm";
42
}
43
+ } else if (have_kvm) {
44
+ accel = "kvm";
45
+ } else if (have_tcg) {
46
+ accel = "tcg";
47
+ } else {
48
+ error_report("No accelerator selected and"
49
+ " no default accelerator available");
50
+ exit(1);
51
}
52
}
53
-
54
accel_list = g_strsplit(accel, ":", 0);
55
56
for (tmp = accel_list; *tmp; tmp++) {
57
--
38
--
58
2.20.1
39
2.34.1
59
40
60
41
diff view generated by jsdifflib
1
There's little point in leaving these data structures half initialized,
1
Use the attribute, which is supported by clang, instead of
2
and relying on a flush to be done during reset.
2
the #pragma, which is not supported and, for some reason,
3
also not detected by the meson probe, so we fail by -Werror.
3
4
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Reviewed-by: Juan Quintela <quintela@redhat.com>
7
Message-Id: <20230501210555.289806-1-richard.henderson@linaro.org>
7
---
8
---
8
accel/tcg/cputlb.c | 5 +++--
9
meson.build | 5 +----
9
1 file changed, 3 insertions(+), 2 deletions(-)
10
migration/xbzrle.c | 9 ++++-----
11
2 files changed, 5 insertions(+), 9 deletions(-)
10
12
11
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
13
diff --git a/meson.build b/meson.build
12
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
13
--- a/accel/tcg/cputlb.c
15
--- a/meson.build
14
+++ b/accel/tcg/cputlb.c
16
+++ b/meson.build
15
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now)
17
@@ -XXX,XX +XXX,XX @@ config_host_data.set('CONFIG_AVX512F_OPT', get_option('avx512f') \
16
fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
18
config_host_data.set('CONFIG_AVX512BW_OPT', get_option('avx512bw') \
17
fast->table = g_new(CPUTLBEntry, n_entries);
19
.require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX512BW') \
18
desc->iotlb = g_new(CPUIOTLBEntry, n_entries);
20
.require(cc.links('''
19
+ tlb_mmu_flush_locked(desc, fast);
21
- #pragma GCC push_options
22
- #pragma GCC target("avx512bw")
23
#include <cpuid.h>
24
#include <immintrin.h>
25
- static int bar(void *a) {
26
-
27
+ static int __attribute__((target("avx512bw"))) bar(void *a) {
28
__m512i *x = a;
29
__m512i res= _mm512_abs_epi8(*x);
30
return res[1];
31
diff --git a/migration/xbzrle.c b/migration/xbzrle.c
32
index XXXXXXX..XXXXXXX 100644
33
--- a/migration/xbzrle.c
34
+++ b/migration/xbzrle.c
35
@@ -XXX,XX +XXX,XX @@ int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen)
20
}
36
}
21
37
22
static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
38
#if defined(CONFIG_AVX512BW_OPT)
23
@@ -XXX,XX +XXX,XX @@ void tlb_init(CPUState *cpu)
39
-#pragma GCC push_options
24
40
-#pragma GCC target("avx512bw")
25
qemu_spin_init(&env_tlb(env)->c.lock);
41
#include <immintrin.h>
26
42
-int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
27
- /* Ensure that cpu_reset performs a full flush. */
43
- uint8_t *dst, int dlen)
28
- env_tlb(env)->c.dirty = ALL_MMUIDX_BITS;
44
+
29
+ /* All tlbs are initialized flushed. */
45
+int __attribute__((target("avx512bw")))
30
+ env_tlb(env)->c.dirty = 0;
46
+xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
31
47
+ uint8_t *dst, int dlen)
32
for (i = 0; i < NB_MMU_MODES; i++) {
48
{
33
tlb_mmu_init(&env_tlb(env)->d[i], &env_tlb(env)->f[i], now);
49
uint32_t zrun_len = 0, nzrun_len = 0;
50
int d = 0, i = 0, num = 0;
51
@@ -XXX,XX +XXX,XX @@ int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
52
}
53
return d;
54
}
55
-#pragma GCC pop_options
56
#endif
34
--
57
--
35
2.20.1
58
2.34.1
36
37
diff view generated by jsdifflib
1
There are no users of this function outside cputlb.c,
1
At least RISC-V has the need to be able to perform a read
2
and its interface will change in the next patch.
2
using execute permissions, outside of translation.
3
Add helpers to facilitate this.
3
4
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Acked-by: Alistair Francis <alistair.francis@wdc.com>
7
Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>
8
Tested-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
9
Message-Id: <20230325105429.1142530-9-richard.henderson@linaro.org>
10
Message-Id: <20230412114333.118895-9-richard.henderson@linaro.org>
8
---
11
---
9
include/exec/cpu_ldst.h | 5 -----
12
include/exec/cpu_ldst.h | 9 +++++++
10
accel/tcg/cputlb.c | 5 +++++
13
accel/tcg/cputlb.c | 48 ++++++++++++++++++++++++++++++++++
11
2 files changed, 5 insertions(+), 5 deletions(-)
14
accel/tcg/user-exec.c | 58 +++++++++++++++++++++++++++++++++++++++++
15
3 files changed, 115 insertions(+)
12
16
13
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
17
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
14
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
15
--- a/include/exec/cpu_ldst.h
19
--- a/include/exec/cpu_ldst.h
16
+++ b/include/exec/cpu_ldst.h
20
+++ b/include/exec/cpu_ldst.h
17
@@ -XXX,XX +XXX,XX @@ static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx,
21
@@ -XXX,XX +XXX,XX @@ static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
18
return (addr >> TARGET_PAGE_BITS) & size_mask;
22
# define cpu_stq_mmu cpu_stq_le_mmu
19
}
23
#endif
20
24
21
-static inline size_t tlb_n_entries(CPUArchState *env, uintptr_t mmu_idx)
25
+uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
22
-{
26
+ MemOpIdx oi, uintptr_t ra);
23
- return (env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS) + 1;
27
+uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
24
-}
28
+ MemOpIdx oi, uintptr_t ra);
25
-
29
+uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
26
/* Find the TLB entry corresponding to the mmu_idx + address pair. */
30
+ MemOpIdx oi, uintptr_t ra);
27
static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
31
+uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
28
target_ulong addr)
32
+ MemOpIdx oi, uintptr_t ra);
33
+
34
uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr);
35
uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr);
36
uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr);
29
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
37
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
30
index XXXXXXX..XXXXXXX 100644
38
index XXXXXXX..XXXXXXX 100644
31
--- a/accel/tcg/cputlb.c
39
--- a/accel/tcg/cputlb.c
32
+++ b/accel/tcg/cputlb.c
40
+++ b/accel/tcg/cputlb.c
33
@@ -XXX,XX +XXX,XX @@ QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
41
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr)
34
QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
42
MemOpIdx oi = make_memop_idx(MO_TEUQ, cpu_mmu_index(env, true));
35
#define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
43
return full_ldq_code(env, addr, oi, 0);
36
44
}
37
+static inline size_t tlb_n_entries(CPUArchState *env, uintptr_t mmu_idx)
45
+
46
+uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
47
+ MemOpIdx oi, uintptr_t retaddr)
38
+{
48
+{
39
+ return (env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS) + 1;
49
+ return full_ldub_code(env, addr, oi, retaddr);
40
+}
50
+}
41
+
51
+
42
static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx)
52
+uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
43
{
53
+ MemOpIdx oi, uintptr_t retaddr)
44
return env_tlb(env)->f[mmu_idx].mask + (1 << CPU_TLB_ENTRY_BITS);
54
+{
55
+ MemOp mop = get_memop(oi);
56
+ int idx = get_mmuidx(oi);
57
+ uint16_t ret;
58
+
59
+ ret = full_lduw_code(env, addr, make_memop_idx(MO_TEUW, idx), retaddr);
60
+ if ((mop & MO_BSWAP) != MO_TE) {
61
+ ret = bswap16(ret);
62
+ }
63
+ return ret;
64
+}
65
+
66
+uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
67
+ MemOpIdx oi, uintptr_t retaddr)
68
+{
69
+ MemOp mop = get_memop(oi);
70
+ int idx = get_mmuidx(oi);
71
+ uint32_t ret;
72
+
73
+ ret = full_ldl_code(env, addr, make_memop_idx(MO_TEUL, idx), retaddr);
74
+ if ((mop & MO_BSWAP) != MO_TE) {
75
+ ret = bswap32(ret);
76
+ }
77
+ return ret;
78
+}
79
+
80
+uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
81
+ MemOpIdx oi, uintptr_t retaddr)
82
+{
83
+ MemOp mop = get_memop(oi);
84
+ int idx = get_mmuidx(oi);
85
+ uint64_t ret;
86
+
87
+ ret = full_ldq_code(env, addr, make_memop_idx(MO_TEUQ, idx), retaddr);
88
+ if ((mop & MO_BSWAP) != MO_TE) {
89
+ ret = bswap64(ret);
90
+ }
91
+ return ret;
92
+}
93
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
94
index XXXXXXX..XXXXXXX 100644
95
--- a/accel/tcg/user-exec.c
96
+++ b/accel/tcg/user-exec.c
97
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr ptr)
98
return ret;
99
}
100
101
+uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
102
+ MemOpIdx oi, uintptr_t ra)
103
+{
104
+ void *haddr;
105
+ uint8_t ret;
106
+
107
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_INST_FETCH);
108
+ ret = ldub_p(haddr);
109
+ clear_helper_retaddr();
110
+ return ret;
111
+}
112
+
113
+uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
114
+ MemOpIdx oi, uintptr_t ra)
115
+{
116
+ void *haddr;
117
+ uint16_t ret;
118
+
119
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_INST_FETCH);
120
+ ret = lduw_p(haddr);
121
+ clear_helper_retaddr();
122
+ if (get_memop(oi) & MO_BSWAP) {
123
+ ret = bswap16(ret);
124
+ }
125
+ return ret;
126
+}
127
+
128
+uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
129
+ MemOpIdx oi, uintptr_t ra)
130
+{
131
+ void *haddr;
132
+ uint32_t ret;
133
+
134
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_INST_FETCH);
135
+ ret = ldl_p(haddr);
136
+ clear_helper_retaddr();
137
+ if (get_memop(oi) & MO_BSWAP) {
138
+ ret = bswap32(ret);
139
+ }
140
+ return ret;
141
+}
142
+
143
+uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
144
+ MemOpIdx oi, uintptr_t ra)
145
+{
146
+ void *haddr;
147
+ uint64_t ret;
148
+
149
+ validate_memop(oi, MO_BEUQ);
150
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
151
+ ret = ldq_p(haddr);
152
+ clear_helper_retaddr();
153
+ if (get_memop(oi) & MO_BSWAP) {
154
+ ret = bswap64(ret);
155
+ }
156
+ return ret;
157
+}
158
+
159
#include "ldst_common.c.inc"
160
161
/*
45
--
162
--
46
2.20.1
163
2.34.1
47
48
diff view generated by jsdifflib
1
Merge into the only caller, but at the same time split
1
Since TCG_TYPE_I32 values are kept sign-extended in registers,
2
out tlb_mmu_init to initialize a single tlb entry.
2
via ".w" instructions, we need not extend if the register matches.
3
This is already relied upon by comparisons.
3
4
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
7
---
9
accel/tcg/cputlb.c | 33 ++++++++++++++++-----------------
8
tcg/loongarch64/tcg-target.c.inc | 4 +++-
10
1 file changed, 16 insertions(+), 17 deletions(-)
9
1 file changed, 3 insertions(+), 1 deletion(-)
11
10
12
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
11
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
13
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
14
--- a/accel/tcg/cputlb.c
13
--- a/tcg/loongarch64/tcg-target.c.inc
15
+++ b/accel/tcg/cputlb.c
14
+++ b/tcg/loongarch64/tcg-target.c.inc
16
@@ -XXX,XX +XXX,XX @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
15
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg)
17
desc->window_max_entries = max_entries;
16
18
}
17
static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg)
19
20
-static void tlb_dyn_init(CPUArchState *env)
21
-{
22
- int i;
23
-
24
- for (i = 0; i < NB_MMU_MODES; i++) {
25
- CPUTLBDesc *desc = &env_tlb(env)->d[i];
26
- size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
27
-
28
- tlb_window_reset(desc, get_clock_realtime(), 0);
29
- desc->n_used_entries = 0;
30
- env_tlb(env)->f[i].mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
31
- env_tlb(env)->f[i].table = g_new(CPUTLBEntry, n_entries);
32
- env_tlb(env)->d[i].iotlb = g_new(CPUIOTLBEntry, n_entries);
33
- }
34
-}
35
-
36
/**
37
* tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
38
* @desc: The CPUTLBDesc portion of the TLB
39
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
40
tlb_mmu_flush_locked(desc, fast);
41
}
42
43
+static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now)
44
+{
45
+ size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
46
+
47
+ tlb_window_reset(desc, now, 0);
48
+ desc->n_used_entries = 0;
49
+ fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
50
+ fast->table = g_new(CPUTLBEntry, n_entries);
51
+ desc->iotlb = g_new(CPUIOTLBEntry, n_entries);
52
+}
53
+
54
static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
55
{
18
{
56
env_tlb(env)->d[mmu_idx].n_used_entries++;
19
- tcg_out_ext32s(s, ret, arg);
57
@@ -XXX,XX +XXX,XX @@ static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx)
20
+ if (ret != arg) {
58
void tlb_init(CPUState *cpu)
21
+ tcg_out_ext32s(s, ret, arg);
59
{
60
CPUArchState *env = cpu->env_ptr;
61
+ int64_t now = get_clock_realtime();
62
+ int i;
63
64
qemu_spin_init(&env_tlb(env)->c.lock);
65
66
/* Ensure that cpu_reset performs a full flush. */
67
env_tlb(env)->c.dirty = ALL_MMUIDX_BITS;
68
69
- tlb_dyn_init(env);
70
+ for (i = 0; i < NB_MMU_MODES; i++) {
71
+ tlb_mmu_init(&env_tlb(env)->d[i], &env_tlb(env)->f[i], now);
72
+ }
22
+ }
73
}
23
}
74
24
75
/* flush_all_helper: run fn across all cpus
25
static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg)
76
--
26
--
77
2.20.1
27
2.34.1
78
28
79
29
diff view generated by jsdifflib
1
No functional change, but the smaller expressions make
1
Since TCG_TYPE_I32 values are kept sign-extended in registers, we need not
2
the code easier to read.
2
extend if the register matches. This is already relied upon by comparisons.
3
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
6
---
9
accel/tcg/cputlb.c | 19 ++++++++++---------
7
tcg/mips/tcg-target.c.inc | 4 +++-
10
1 file changed, 10 insertions(+), 9 deletions(-)
8
1 file changed, 3 insertions(+), 1 deletion(-)
11
9
12
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
10
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
13
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
14
--- a/accel/tcg/cputlb.c
12
--- a/tcg/mips/tcg-target.c.inc
15
+++ b/accel/tcg/cputlb.c
13
+++ b/tcg/mips/tcg-target.c.inc
16
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
14
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rs)
17
15
18
static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
16
static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rs)
19
{
17
{
20
- tlb_mmu_resize_locked(&env_tlb(env)->d[mmu_idx], &env_tlb(env)->f[mmu_idx]);
18
- tcg_out_ext32s(s, rd, rs);
21
- env_tlb(env)->d[mmu_idx].n_used_entries = 0;
19
+ if (rd != rs) {
22
- env_tlb(env)->d[mmu_idx].large_page_addr = -1;
20
+ tcg_out_ext32s(s, rd, rs);
23
- env_tlb(env)->d[mmu_idx].large_page_mask = -1;
21
+ }
24
- env_tlb(env)->d[mmu_idx].vindex = 0;
25
- memset(env_tlb(env)->f[mmu_idx].table, -1,
26
- sizeof_tlb(&env_tlb(env)->f[mmu_idx]));
27
- memset(env_tlb(env)->d[mmu_idx].vtable, -1,
28
- sizeof(env_tlb(env)->d[0].vtable));
29
+ CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
30
+ CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
31
+
32
+ tlb_mmu_resize_locked(desc, fast);
33
+ desc->n_used_entries = 0;
34
+ desc->large_page_addr = -1;
35
+ desc->large_page_mask = -1;
36
+ desc->vindex = 0;
37
+ memset(fast->table, -1, sizeof_tlb(fast));
38
+ memset(desc->vtable, -1, sizeof(desc->vtable));
39
}
22
}
40
23
41
static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
24
static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rs)
42
--
25
--
43
2.20.1
26
2.34.1
44
27
45
28
diff view generated by jsdifflib
1
We will want to be able to flush a tlb without resizing.
1
This is common code in most qemu_{ld,st} slow paths, moving two
2
2
registers when there may be overlap between sources and destinations.
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
At present, this is only used by 32-bit hosts for 64-bit data,
4
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
4
but will shortly be used for more than that.
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
5
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
---
8
accel/tcg/cputlb.c | 15 ++++++++++-----
9
tcg/tcg.c | 69 ++++++++++++++++++++++++++++++++++++---
9
1 file changed, 10 insertions(+), 5 deletions(-)
10
tcg/arm/tcg-target.c.inc | 44 ++++++++++---------------
10
11
tcg/i386/tcg-target.c.inc | 19 +++++------
11
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
12
3 files changed, 90 insertions(+), 42 deletions(-)
13
14
diff --git a/tcg/tcg.c b/tcg/tcg.c
12
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
13
--- a/accel/tcg/cputlb.c
16
--- a/tcg/tcg.c
14
+++ b/accel/tcg/cputlb.c
17
+++ b/tcg/tcg.c
15
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
18
@@ -XXX,XX +XXX,XX @@ static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
16
}
19
static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
20
static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
21
static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
22
-static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
23
- __attribute__((unused));
24
+static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
25
static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
26
static void tcg_out_goto_tb(TCGContext *s, int which);
27
static void tcg_out_op(TCGContext *s, TCGOpcode opc,
28
@@ -XXX,XX +XXX,XX @@ void tcg_raise_tb_overflow(TCGContext *s)
29
siglongjmp(s->jmp_trans, -2);
17
}
30
}
18
31
19
-static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
32
+typedef struct TCGMovExtend {
20
+static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
33
+ TCGReg dst;
34
+ TCGReg src;
35
+ TCGType dst_type;
36
+ TCGType src_type;
37
+ MemOp src_ext;
38
+} TCGMovExtend;
39
+
40
/**
41
* tcg_out_movext -- move and extend
42
* @s: tcg context
43
@@ -XXX,XX +XXX,XX @@ void tcg_raise_tb_overflow(TCGContext *s)
44
*
45
* Move or extend @src into @dst, depending on @src_ext and the types.
46
*/
47
-static void __attribute__((unused))
48
-tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
49
- TCGType src_type, MemOp src_ext, TCGReg src)
50
+static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
51
+ TCGType src_type, MemOp src_ext, TCGReg src)
21
{
52
{
22
- CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
53
switch (src_ext) {
23
- CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
54
case MO_UB:
24
-
55
@@ -XXX,XX +XXX,XX @@ tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
25
- tlb_mmu_resize_locked(desc, fast);
56
}
26
desc->n_used_entries = 0;
27
desc->large_page_addr = -1;
28
desc->large_page_mask = -1;
29
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
30
memset(desc->vtable, -1, sizeof(desc->vtable));
31
}
57
}
32
58
33
+static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
59
+/* Minor variations on a theme, using a structure. */
60
+static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
61
+ TCGReg src)
34
+{
62
+{
35
+ CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
63
+ tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
36
+ CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
37
+
38
+ tlb_mmu_resize_locked(desc, fast);
39
+ tlb_mmu_flush_locked(desc, fast);
40
+}
64
+}
41
+
65
+
42
static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
66
+static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
67
+{
68
+ tcg_out_movext1_new_src(s, i, i->src);
69
+}
70
+
71
+/**
72
+ * tcg_out_movext2 -- move and extend two pair
73
+ * @s: tcg context
74
+ * @i1: first move description
75
+ * @i2: second move description
76
+ * @scratch: temporary register, or -1 for none
77
+ *
78
+ * As tcg_out_movext, for both @i1 and @i2, caring for overlap
79
+ * between the sources and destinations.
80
+ */
81
+
82
+static void __attribute__((unused))
83
+tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
84
+ const TCGMovExtend *i2, int scratch)
85
+{
86
+ TCGReg src1 = i1->src;
87
+ TCGReg src2 = i2->src;
88
+
89
+ if (i1->dst != src2) {
90
+ tcg_out_movext1(s, i1);
91
+ tcg_out_movext1(s, i2);
92
+ return;
93
+ }
94
+ if (i2->dst == src1) {
95
+ TCGType src1_type = i1->src_type;
96
+ TCGType src2_type = i2->src_type;
97
+
98
+ if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
99
+ /* The data is now in the correct registers, now extend. */
100
+ src1 = i2->src;
101
+ src2 = i1->src;
102
+ } else {
103
+ tcg_debug_assert(scratch >= 0);
104
+ tcg_out_mov(s, src1_type, scratch, src1);
105
+ src1 = scratch;
106
+ }
107
+ }
108
+ tcg_out_movext1_new_src(s, i2, src2);
109
+ tcg_out_movext1_new_src(s, i1, src1);
110
+}
111
+
112
#define C_PFX1(P, A) P##A
113
#define C_PFX2(P, A, B) P##A##_##B
114
#define C_PFX3(P, A, B, C) P##A##_##B##_##C
115
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
116
index XXXXXXX..XXXXXXX 100644
117
--- a/tcg/arm/tcg-target.c.inc
118
+++ b/tcg/arm/tcg-target.c.inc
119
@@ -XXX,XX +XXX,XX @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
120
121
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
43
{
122
{
44
env_tlb(env)->d[mmu_idx].n_used_entries++;
123
- TCGReg argreg, datalo, datahi;
124
+ TCGReg argreg;
125
MemOpIdx oi = lb->oi;
126
MemOp opc = get_memop(oi);
127
128
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
129
/* Use the canonical unsigned helpers and minimize icache usage. */
130
tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
131
132
- datalo = lb->datalo_reg;
133
- datahi = lb->datahi_reg;
134
if ((opc & MO_SIZE) == MO_64) {
135
- if (datalo != TCG_REG_R1) {
136
- tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
137
- tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
138
- } else if (datahi != TCG_REG_R0) {
139
- tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
140
- tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
141
- } else {
142
- tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0);
143
- tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
144
- tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_TMP);
145
- }
146
+ TCGMovExtend ext[2] = {
147
+ { .dst = lb->datalo_reg, .dst_type = TCG_TYPE_I32,
148
+ .src = TCG_REG_R0, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
149
+ { .dst = lb->datahi_reg, .dst_type = TCG_TYPE_I32,
150
+ .src = TCG_REG_R1, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
151
+ };
152
+ tcg_out_movext2(s, &ext[0], &ext[1], TCG_REG_TMP);
153
} else {
154
- tcg_out_movext(s, TCG_TYPE_I32, datalo,
155
+ tcg_out_movext(s, TCG_TYPE_I32, lb->datalo_reg,
156
TCG_TYPE_I32, opc & MO_SSIZE, TCG_REG_R0);
157
}
158
159
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
160
161
if (TARGET_LONG_BITS == 64) {
162
/* 64-bit target address is aligned into R2:R3. */
163
- if (l->addrhi_reg != TCG_REG_R2) {
164
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, l->addrlo_reg);
165
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, l->addrhi_reg);
166
- } else if (l->addrlo_reg != TCG_REG_R3) {
167
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, l->addrhi_reg);
168
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, l->addrlo_reg);
169
- } else {
170
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, TCG_REG_R2);
171
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, TCG_REG_R3);
172
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, TCG_REG_R1);
173
- }
174
+ TCGMovExtend ext[2] = {
175
+ { .dst = TCG_REG_R2, .dst_type = TCG_TYPE_I32,
176
+ .src = l->addrlo_reg,
177
+ .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
178
+ { .dst = TCG_REG_R3, .dst_type = TCG_TYPE_I32,
179
+ .src = l->addrhi_reg,
180
+ .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
181
+ };
182
+ tcg_out_movext2(s, &ext[0], &ext[1], TCG_REG_TMP);
183
} else {
184
tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, l->addrlo_reg);
185
}
186
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
187
index XXXXXXX..XXXXXXX 100644
188
--- a/tcg/i386/tcg-target.c.inc
189
+++ b/tcg/i386/tcg-target.c.inc
190
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
191
{
192
MemOpIdx oi = l->oi;
193
MemOp opc = get_memop(oi);
194
- TCGReg data_reg;
195
tcg_insn_unit **label_ptr = &l->label_ptr[0];
196
197
/* resolve label address */
198
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
199
200
tcg_out_branch(s, 1, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
201
202
- data_reg = l->datalo_reg;
203
if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
204
- if (data_reg == TCG_REG_EDX) {
205
- /* xchg %edx, %eax */
206
- tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
207
- tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
208
- } else {
209
- tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
210
- tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
211
- }
212
+ TCGMovExtend ext[2] = {
213
+ { .dst = l->datalo_reg, .dst_type = TCG_TYPE_I32,
214
+ .src = TCG_REG_EAX, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
215
+ { .dst = l->datahi_reg, .dst_type = TCG_TYPE_I32,
216
+ .src = TCG_REG_EDX, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
217
+ };
218
+ tcg_out_movext2(s, &ext[0], &ext[1], -1);
219
} else {
220
- tcg_out_movext(s, l->type, data_reg,
221
+ tcg_out_movext(s, l->type, l->datalo_reg,
222
TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_EAX);
223
}
224
45
--
225
--
46
2.20.1
226
2.34.1
47
227
48
228
diff view generated by jsdifflib