1 | Pretty small still, but there are two patches that ought | 1 | v2: tcg/loongarch64 patch set without last minute tweaks. |
---|---|---|---|
2 | to get backported to stable, so no point in delaying. | ||
3 | 2 | ||
4 | r~ | 3 | r~ |
5 | 4 | ||
6 | The following changes since commit a5ba0a7e4e150d1350a041f0d0ef9ca6c8d7c307: | 5 | The following changes since commit 005ad32358f12fe9313a4a01918a55e60d4f39e5: |
7 | 6 | ||
8 | Merge tag 'pull-aspeed-20241211' of https://github.com/legoater/qemu into staging (2024-12-11 15:16:47 +0000) | 7 | Merge tag 'pull-tpm-2023-09-12-3' of https://github.com/stefanberger/qemu-tpm into staging (2023-09-13 13:41:57 -0400) |
9 | 8 | ||
10 | are available in the Git repository at: | 9 | are available in the Git repository at: |
11 | 10 | ||
12 | https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20241212 | 11 | https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230915-2 |
13 | 12 | ||
14 | for you to fetch changes up to 7ac87b14a92234b6a89b701b4043ad6cf8bdcccf: | 13 | for you to fetch changes up to a97a83753c90d79ed15a716610af23fabd84aaed: |
15 | 14 | ||
16 | target/sparc: Use memcpy() and remove memcpy32() (2024-12-12 14:28:38 -0600) | 15 | tcg: Map code_gen_buffer with PROT_BTI (2023-09-16 14:57:16 +0000) |
17 | 16 | ||
18 | ---------------------------------------------------------------- | 17 | ---------------------------------------------------------------- |
19 | tcg: Reset free_temps before tcg_optimize | 18 | *: Delete checks for old host definitions |
20 | tcg/riscv: Fix StoreStore barrier generation | 19 | tcg/loongarch64: Generate LSX instructions |
21 | include/exec: Introduce fpst alias in helper-head.h.inc | 20 | fpu: Add conversions between bfloat16 and [u]int8 |
22 | target/sparc: Use memcpy() and remove memcpy32() | 21 | fpu: Handle m68k extended precision denormals properly |
22 | accel/tcg: Improve cputlb i/o organization | ||
23 | accel/tcg: Simplify tlb_plugin_lookup | ||
24 | accel/tcg: Remove false-negative halted assertion | ||
25 | tcg: Add gvec compare with immediate and scalar operand | ||
26 | tcg/aarch64: Emit BTI insns at jump landing pads | ||
23 | 27 | ||
24 | ---------------------------------------------------------------- | 28 | ---------------------------------------------------------------- |
25 | Philippe Mathieu-Daudé (1): | 29 | Akihiko Odaki (3): |
26 | target/sparc: Use memcpy() and remove memcpy32() | 30 | util: Delete checks for old host definitions |
31 | softmmu: Delete checks for old host definitions | ||
32 | thunk: Delete checks for old host definitions | ||
27 | 33 | ||
28 | Richard Henderson (2): | 34 | Jiajie Chen (16): |
29 | tcg: Reset free_temps before tcg_optimize | 35 | tcg/loongarch64: Import LSX instructions |
30 | include/exec: Introduce fpst alias in helper-head.h.inc | 36 | tcg/loongarch64: Lower basic tcg vec ops to LSX |
37 | tcg: pass vece to tcg_target_const_match() | ||
38 | tcg/loongarch64: Lower cmp_vec to vseq/vsle/vslt | ||
39 | tcg/loongarch64: Lower add/sub_vec to vadd/vsub | ||
40 | tcg/loongarch64: Lower vector bitwise operations | ||
41 | tcg/loongarch64: Lower neg_vec to vneg | ||
42 | tcg/loongarch64: Lower mul_vec to vmul | ||
43 | tcg/loongarch64: Lower vector min max ops | ||
44 | tcg/loongarch64: Lower vector saturated ops | ||
45 | tcg/loongarch64: Lower vector shift vector ops | ||
46 | tcg/loongarch64: Lower bitsel_vec to vbitsel | ||
47 | tcg/loongarch64: Lower vector shift integer ops | ||
48 | tcg/loongarch64: Lower rotv_vec ops to LSX | ||
49 | tcg/loongarch64: Lower rotli_vec to vrotri | ||
50 | tcg/loongarch64: Implement 128-bit load & store | ||
31 | 51 | ||
32 | Roman Artemev (1): | 52 | LIU Zhiwei (2): |
33 | tcg/riscv: Fix StoreStore barrier generation | 53 | accel/tcg: Fix the comment for CPUTLBEntryFull |
54 | fpu: Add conversions between bfloat16 and [u]int8 | ||
34 | 55 | ||
35 | include/tcg/tcg-temp-internal.h | 6 ++++++ | 56 | Nicholas Piggin (1): |
36 | accel/tcg/plugin-gen.c | 2 +- | 57 | accel/tcg: mttcg remove false-negative halted assertion |
37 | target/sparc/win_helper.c | 26 ++++++++------------------ | ||
38 | tcg/tcg.c | 5 ++++- | ||
39 | include/exec/helper-head.h.inc | 3 +++ | ||
40 | tcg/riscv/tcg-target.c.inc | 2 +- | ||
41 | 6 files changed, 23 insertions(+), 21 deletions(-) | ||
42 | 58 | ||
59 | Richard Henderson (17): | ||
60 | tcg: Add gvec compare with immediate and scalar operand | ||
61 | target/arm: Use tcg_gen_gvec_cmpi for compare vs 0 | ||
62 | accel/tcg: Simplify tlb_plugin_lookup | ||
63 | accel/tcg: Split out io_prepare and io_failed | ||
64 | accel/tcg: Use CPUTLBEntryFull.phys_addr in io_failed | ||
65 | plugin: Simplify struct qemu_plugin_hwaddr | ||
66 | accel/tcg: Merge cpu_transaction_failed into io_failed | ||
67 | accel/tcg: Replace direct use of io_readx/io_writex in do_{ld,st}_1 | ||
68 | accel/tcg: Merge io_readx into do_ld_mmio_beN | ||
69 | accel/tcg: Merge io_writex into do_st_mmio_leN | ||
70 | accel/tcg: Introduce do_ld16_mmio_beN | ||
71 | accel/tcg: Introduce do_st16_mmio_leN | ||
72 | fpu: Handle m68k extended precision denormals properly | ||
73 | tcg: Add tcg_out_tb_start backend hook | ||
74 | util/cpuinfo-aarch64: Add CPUINFO_BTI | ||
75 | tcg/aarch64: Emit BTI insns at jump landing pads | ||
76 | tcg: Map code_gen_buffer with PROT_BTI | ||
77 | |||
78 | accel/tcg/tcg-runtime.h | 25 + | ||
79 | host/include/aarch64/host/cpuinfo.h | 1 + | ||
80 | include/exec/cpu-defs.h | 12 +- | ||
81 | include/exec/user/thunk.h | 3 +- | ||
82 | include/fpu/softfloat.h | 12 + | ||
83 | include/hw/core/cpu.h | 13 - | ||
84 | include/qemu/plugin-memory.h | 11 +- | ||
85 | include/qemu/typedefs.h | 1 - | ||
86 | include/tcg/tcg-op-gvec-common.h | 6 + | ||
87 | tcg/loongarch64/tcg-target-con-set.h | 9 + | ||
88 | tcg/loongarch64/tcg-target-con-str.h | 3 + | ||
89 | tcg/loongarch64/tcg-target.h | 40 +- | ||
90 | tcg/loongarch64/tcg-target.opc.h | 12 + | ||
91 | accel/tcg/cputlb.c | 437 ++- | ||
92 | accel/tcg/tcg-accel-ops-mttcg.c | 9 +- | ||
93 | accel/tcg/tcg-runtime-gvec.c | 26 + | ||
94 | fpu/softfloat.c | 67 +- | ||
95 | plugins/api.c | 27 +- | ||
96 | softmmu/async-teardown.c | 3 - | ||
97 | target/arm/tcg/translate.c | 56 +- | ||
98 | tcg/region.c | 41 +- | ||
99 | tcg/tcg-op-gvec.c | 149 + | ||
100 | tcg/tcg.c | 7 +- | ||
101 | tests/tcg/m68k/denormal.c | 53 + | ||
102 | util/cpuinfo-aarch64.c | 7 + | ||
103 | util/oslib-posix.c | 15 +- | ||
104 | fpu/softfloat-parts.c.inc | 7 +- | ||
105 | tcg/aarch64/tcg-target.c.inc | 59 +- | ||
106 | tcg/arm/tcg-target.c.inc | 7 +- | ||
107 | tcg/i386/tcg-target.c.inc | 7 +- | ||
108 | tcg/loongarch64/tcg-insn-defs.c.inc | 6019 +++++++++++++++++++++++++++++++++- | ||
109 | tcg/loongarch64/tcg-target.c.inc | 624 +++- | ||
110 | tcg/mips/tcg-target.c.inc | 7 +- | ||
111 | tcg/ppc/tcg-target.c.inc | 7 +- | ||
112 | tcg/riscv/tcg-target.c.inc | 7 +- | ||
113 | tcg/s390x/tcg-target.c.inc | 7 +- | ||
114 | tcg/sparc64/tcg-target.c.inc | 7 +- | ||
115 | tcg/tci/tcg-target.c.inc | 7 +- | ||
116 | tests/tcg/m68k/Makefile.target | 2 +- | ||
117 | 39 files changed, 7419 insertions(+), 393 deletions(-) | ||
118 | create mode 100644 tcg/loongarch64/tcg-target.opc.h | ||
119 | create mode 100644 tests/tcg/m68k/denormal.c | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | When allocating new temps during tcg_optmize, do not re-use | ||
2 | any EBB temps that were used within the TB. We do not have | ||
3 | any idea what span of the TB in which the temp was live. | ||
4 | 1 | ||
5 | Introduce tcg_temp_ebb_reset_freed and use before tcg_optimize, | ||
6 | as well as replacing the equivalent in plugin_gen_inject and | ||
7 | tcg_func_start. | ||
8 | |||
9 | Cc: qemu-stable@nongnu.org | ||
10 | Fixes: fb04ab7ddd8 ("tcg/optimize: Lower TCG_COND_TST{EQ,NE} if unsupported") | ||
11 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2711 | ||
12 | Reported-by: wannacu <wannacu2049@gmail.com> | ||
13 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
14 | Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> | ||
15 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
16 | --- | ||
17 | include/tcg/tcg-temp-internal.h | 6 ++++++ | ||
18 | accel/tcg/plugin-gen.c | 2 +- | ||
19 | tcg/tcg.c | 5 ++++- | ||
20 | 3 files changed, 11 insertions(+), 2 deletions(-) | ||
21 | |||
22 | diff --git a/include/tcg/tcg-temp-internal.h b/include/tcg/tcg-temp-internal.h | ||
23 | index XXXXXXX..XXXXXXX 100644 | ||
24 | --- a/include/tcg/tcg-temp-internal.h | ||
25 | +++ b/include/tcg/tcg-temp-internal.h | ||
26 | @@ -XXX,XX +XXX,XX @@ TCGv_i64 tcg_temp_ebb_new_i64(void); | ||
27 | TCGv_ptr tcg_temp_ebb_new_ptr(void); | ||
28 | TCGv_i128 tcg_temp_ebb_new_i128(void); | ||
29 | |||
30 | +/* Forget all freed EBB temps, so that new allocations produce new temps. */ | ||
31 | +static inline void tcg_temp_ebb_reset_freed(TCGContext *s) | ||
32 | +{ | ||
33 | + memset(s->free_temps, 0, sizeof(s->free_temps)); | ||
34 | +} | ||
35 | + | ||
36 | #endif /* TCG_TEMP_FREE_H */ | ||
37 | diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/accel/tcg/plugin-gen.c | ||
40 | +++ b/accel/tcg/plugin-gen.c | ||
41 | @@ -XXX,XX +XXX,XX @@ static void plugin_gen_inject(struct qemu_plugin_tb *plugin_tb) | ||
42 | * that might be live within the existing opcode stream. | ||
43 | * The simplest solution is to release them all and create new. | ||
44 | */ | ||
45 | - memset(tcg_ctx->free_temps, 0, sizeof(tcg_ctx->free_temps)); | ||
46 | + tcg_temp_ebb_reset_freed(tcg_ctx); | ||
47 | |||
48 | QTAILQ_FOREACH_SAFE(op, &tcg_ctx->ops, link, next) { | ||
49 | switch (op->opc) { | ||
50 | diff --git a/tcg/tcg.c b/tcg/tcg.c | ||
51 | index XXXXXXX..XXXXXXX 100644 | ||
52 | --- a/tcg/tcg.c | ||
53 | +++ b/tcg/tcg.c | ||
54 | @@ -XXX,XX +XXX,XX @@ void tcg_func_start(TCGContext *s) | ||
55 | s->nb_temps = s->nb_globals; | ||
56 | |||
57 | /* No temps have been previously allocated for size or locality. */ | ||
58 | - memset(s->free_temps, 0, sizeof(s->free_temps)); | ||
59 | + tcg_temp_ebb_reset_freed(s); | ||
60 | |||
61 | /* No constant temps have been previously allocated. */ | ||
62 | for (int i = 0; i < TCG_TYPE_COUNT; ++i) { | ||
63 | @@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) | ||
64 | } | ||
65 | #endif | ||
66 | |||
67 | + /* Do not reuse any EBB that may be allocated within the TB. */ | ||
68 | + tcg_temp_ebb_reset_freed(s); | ||
69 | + | ||
70 | tcg_optimize(s); | ||
71 | |||
72 | reachable_code_pass(s); | ||
73 | -- | ||
74 | 2.43.0 | ||
75 | |||
76 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Roman Artemev <roman.artemev@syntacore.com> | ||
2 | 1 | ||
3 | On RISC-V to StoreStore barrier corresponds | ||
4 | `fence w, w` not `fence r, r` | ||
5 | |||
6 | Cc: qemu-stable@nongnu.org | ||
7 | Fixes: efbea94c76b ("tcg/riscv: Add slowpath load and store instructions") | ||
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | ||
9 | Signed-off-by: Denis Tomashev <denis.tomashev@syntacore.com> | ||
10 | Signed-off-by: Roman Artemev <roman.artemev@syntacore.com> | ||
11 | Message-ID: <e2f2131e294a49e79959d4fa9ec02cf4@syntacore.com> | ||
12 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
13 | --- | ||
14 | tcg/riscv/tcg-target.c.inc | 2 +- | ||
15 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
16 | |||
17 | diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/tcg/riscv/tcg-target.c.inc | ||
20 | +++ b/tcg/riscv/tcg-target.c.inc | ||
21 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_mb(TCGContext *s, TCGArg a0) | ||
22 | insn |= 0x02100000; | ||
23 | } | ||
24 | if (a0 & TCG_MO_ST_ST) { | ||
25 | - insn |= 0x02200000; | ||
26 | + insn |= 0x01100000; | ||
27 | } | ||
28 | tcg_out32(s, insn); | ||
29 | } | ||
30 | -- | ||
31 | 2.43.0 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | This allows targets to declare that the helper requires a | ||
2 | float_status pointer and instead of a generic void pointer. | ||
3 | 1 | ||
4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
6 | --- | ||
7 | include/exec/helper-head.h.inc | 3 +++ | ||
8 | 1 file changed, 3 insertions(+) | ||
9 | |||
10 | diff --git a/include/exec/helper-head.h.inc b/include/exec/helper-head.h.inc | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/include/exec/helper-head.h.inc | ||
13 | +++ b/include/exec/helper-head.h.inc | ||
14 | @@ -XXX,XX +XXX,XX @@ | ||
15 | #define dh_alias_ptr ptr | ||
16 | #define dh_alias_cptr ptr | ||
17 | #define dh_alias_env ptr | ||
18 | +#define dh_alias_fpst ptr | ||
19 | #define dh_alias_void void | ||
20 | #define dh_alias_noreturn noreturn | ||
21 | #define dh_alias(t) glue(dh_alias_, t) | ||
22 | @@ -XXX,XX +XXX,XX @@ | ||
23 | #define dh_ctype_ptr void * | ||
24 | #define dh_ctype_cptr const void * | ||
25 | #define dh_ctype_env CPUArchState * | ||
26 | +#define dh_ctype_fpst float_status * | ||
27 | #define dh_ctype_void void | ||
28 | #define dh_ctype_noreturn G_NORETURN void | ||
29 | #define dh_ctype(t) dh_ctype_##t | ||
30 | @@ -XXX,XX +XXX,XX @@ | ||
31 | #define dh_typecode_f64 dh_typecode_i64 | ||
32 | #define dh_typecode_cptr dh_typecode_ptr | ||
33 | #define dh_typecode_env dh_typecode_ptr | ||
34 | +#define dh_typecode_fpst dh_typecode_ptr | ||
35 | #define dh_typecode(t) dh_typecode_##t | ||
36 | |||
37 | #define dh_callflag_i32 0 | ||
38 | -- | ||
39 | 2.43.0 | ||
40 | |||
41 | diff view generated by jsdifflib |
1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> | 1 | From: Jiajie Chen <c@jia.je> |
---|---|---|---|
2 | 2 | ||
3 | Rather than manually copying each register, use | 3 | If LSX is available, use LSX instructions to implement 128-bit load & |
4 | the libc memcpy(), which is well optimized nowadays. | 4 | store when MO_128 is required, otherwise use two 64-bit loads & stores. |
5 | 5 | ||
6 | Suggested-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> | 6 | Signed-off-by: Jiajie Chen <c@jia.je> |
7 | Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> | 7 | Message-Id: <20230908022302.180442-17-c@jia.je> |
8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> | 8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
9 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
10 | Message-ID: <20241205205418.67613-1-philmd@linaro.org> | ||
11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 9 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
12 | --- | 10 | --- |
13 | target/sparc/win_helper.c | 26 ++++++++------------------ | 11 | tcg/loongarch64/tcg-target-con-set.h | 2 + |
14 | 1 file changed, 8 insertions(+), 18 deletions(-) | 12 | tcg/loongarch64/tcg-target.h | 2 +- |
13 | tcg/loongarch64/tcg-target.c.inc | 59 ++++++++++++++++++++++++++++ | ||
14 | 3 files changed, 62 insertions(+), 1 deletion(-) | ||
15 | 15 | ||
16 | diff --git a/target/sparc/win_helper.c b/target/sparc/win_helper.c | 16 | diff --git a/tcg/loongarch64/tcg-target-con-set.h b/tcg/loongarch64/tcg-target-con-set.h |
17 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/target/sparc/win_helper.c | 18 | --- a/tcg/loongarch64/tcg-target-con-set.h |
19 | +++ b/target/sparc/win_helper.c | 19 | +++ b/tcg/loongarch64/tcg-target-con-set.h |
20 | @@ -XXX,XX +XXX,XX @@ | 20 | @@ -XXX,XX +XXX,XX @@ C_O0_I1(r) |
21 | #include "exec/helper-proto.h" | 21 | C_O0_I2(rZ, r) |
22 | #include "trace.h" | 22 | C_O0_I2(rZ, rZ) |
23 | 23 | C_O0_I2(w, r) | |
24 | -static inline void memcpy32(target_ulong *dst, const target_ulong *src) | 24 | +C_O0_I3(r, r, r) |
25 | -{ | 25 | C_O1_I1(r, r) |
26 | - dst[0] = src[0]; | 26 | C_O1_I1(w, r) |
27 | - dst[1] = src[1]; | 27 | C_O1_I1(w, w) |
28 | - dst[2] = src[2]; | 28 | @@ -XXX,XX +XXX,XX @@ C_O1_I2(w, w, wM) |
29 | - dst[3] = src[3]; | 29 | C_O1_I2(w, w, wA) |
30 | - dst[4] = src[4]; | 30 | C_O1_I3(w, w, w, w) |
31 | - dst[5] = src[5]; | 31 | C_O1_I4(r, rZ, rJ, rZ, rZ) |
32 | - dst[6] = src[6]; | 32 | +C_O2_I1(r, r, r) |
33 | - dst[7] = src[7]; | 33 | diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h |
34 | -} | 34 | index XXXXXXX..XXXXXXX 100644 |
35 | - | 35 | --- a/tcg/loongarch64/tcg-target.h |
36 | void cpu_set_cwp(CPUSPARCState *env, int new_cwp) | 36 | +++ b/tcg/loongarch64/tcg-target.h |
37 | { | 37 | @@ -XXX,XX +XXX,XX @@ extern bool use_lsx_instructions; |
38 | /* put the modified wrap registers at their proper location */ | 38 | #define TCG_TARGET_HAS_muluh_i64 1 |
39 | if (env->cwp == env->nwindows - 1) { | 39 | #define TCG_TARGET_HAS_mulsh_i64 1 |
40 | - memcpy32(env->regbase, env->regbase + env->nwindows * 16); | 40 | |
41 | + memcpy(env->regbase, env->regbase + env->nwindows * 16, | 41 | -#define TCG_TARGET_HAS_qemu_ldst_i128 0 |
42 | + sizeof(env->gregs)); | 42 | +#define TCG_TARGET_HAS_qemu_ldst_i128 use_lsx_instructions |
43 | } | 43 | |
44 | env->cwp = new_cwp; | 44 | #define TCG_TARGET_HAS_v64 0 |
45 | 45 | #define TCG_TARGET_HAS_v128 use_lsx_instructions | |
46 | /* put the wrap registers at their temporary location */ | 46 | diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc |
47 | if (new_cwp == env->nwindows - 1) { | 47 | index XXXXXXX..XXXXXXX 100644 |
48 | - memcpy32(env->regbase + env->nwindows * 16, env->regbase); | 48 | --- a/tcg/loongarch64/tcg-target.c.inc |
49 | + memcpy(env->regbase + env->nwindows * 16, env->regbase, | 49 | +++ b/tcg/loongarch64/tcg-target.c.inc |
50 | + sizeof(env->gregs)); | 50 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, |
51 | } | ||
52 | env->regwptr = env->regbase + (new_cwp * 16); | ||
53 | } | ||
54 | @@ -XXX,XX +XXX,XX @@ void cpu_gl_switch_gregs(CPUSPARCState *env, uint32_t new_gl) | ||
55 | dst = get_gl_gregset(env, env->gl); | ||
56 | |||
57 | if (src != dst) { | ||
58 | - memcpy32(dst, env->gregs); | ||
59 | - memcpy32(env->gregs, src); | ||
60 | + memcpy(dst, env->gregs, sizeof(env->gregs)); | ||
61 | + memcpy(env->gregs, src, sizeof(env->gregs)); | ||
62 | } | 51 | } |
63 | } | 52 | } |
64 | 53 | ||
65 | @@ -XXX,XX +XXX,XX @@ void cpu_change_pstate(CPUSPARCState *env, uint32_t new_pstate) | 54 | +static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg data_lo, TCGReg data_hi, |
66 | /* Switch global register bank */ | 55 | + TCGReg addr_reg, MemOpIdx oi, bool is_ld) |
67 | src = get_gregset(env, new_pstate_regs); | 56 | +{ |
68 | dst = get_gregset(env, pstate_regs); | 57 | + TCGLabelQemuLdst *ldst; |
69 | - memcpy32(dst, env->gregs); | 58 | + HostAddress h; |
70 | - memcpy32(env->gregs, src); | 59 | + |
71 | + memcpy(dst, env->gregs, sizeof(env->gregs)); | 60 | + ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld); |
72 | + memcpy(env->gregs, src, sizeof(env->gregs)); | 61 | + |
73 | } else { | 62 | + if (h.aa.atom == MO_128) { |
74 | trace_win_helper_no_switch_pstate(new_pstate_regs); | 63 | + /* |
75 | } | 64 | + * Use VLDX/VSTX when 128-bit atomicity is required. |
65 | + * If address is aligned to 16-bytes, the 128-bit load/store is atomic. | ||
66 | + */ | ||
67 | + if (is_ld) { | ||
68 | + tcg_out_opc_vldx(s, TCG_VEC_TMP0, h.base, h.index); | ||
69 | + tcg_out_opc_vpickve2gr_d(s, data_lo, TCG_VEC_TMP0, 0); | ||
70 | + tcg_out_opc_vpickve2gr_d(s, data_hi, TCG_VEC_TMP0, 1); | ||
71 | + } else { | ||
72 | + tcg_out_opc_vinsgr2vr_d(s, TCG_VEC_TMP0, data_lo, 0); | ||
73 | + tcg_out_opc_vinsgr2vr_d(s, TCG_VEC_TMP0, data_hi, 1); | ||
74 | + tcg_out_opc_vstx(s, TCG_VEC_TMP0, h.base, h.index); | ||
75 | + } | ||
76 | + } else { | ||
77 | + /* Otherwise use a pair of LD/ST. */ | ||
78 | + tcg_out_opc_add_d(s, TCG_REG_TMP0, h.base, h.index); | ||
79 | + if (is_ld) { | ||
80 | + tcg_out_opc_ld_d(s, data_lo, TCG_REG_TMP0, 0); | ||
81 | + tcg_out_opc_ld_d(s, data_hi, TCG_REG_TMP0, 8); | ||
82 | + } else { | ||
83 | + tcg_out_opc_st_d(s, data_lo, TCG_REG_TMP0, 0); | ||
84 | + tcg_out_opc_st_d(s, data_hi, TCG_REG_TMP0, 8); | ||
85 | + } | ||
86 | + } | ||
87 | + | ||
88 | + if (ldst) { | ||
89 | + ldst->type = TCG_TYPE_I128; | ||
90 | + ldst->datalo_reg = data_lo; | ||
91 | + ldst->datahi_reg = data_hi; | ||
92 | + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); | ||
93 | + } | ||
94 | +} | ||
95 | + | ||
96 | /* | ||
97 | * Entry-points | ||
98 | */ | ||
99 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, | ||
100 | TCGArg a0 = args[0]; | ||
101 | TCGArg a1 = args[1]; | ||
102 | TCGArg a2 = args[2]; | ||
103 | + TCGArg a3 = args[3]; | ||
104 | int c2 = const_args[2]; | ||
105 | |||
106 | switch (opc) { | ||
107 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, | ||
108 | case INDEX_op_qemu_ld_a64_i64: | ||
109 | tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64); | ||
110 | break; | ||
111 | + case INDEX_op_qemu_ld_a32_i128: | ||
112 | + case INDEX_op_qemu_ld_a64_i128: | ||
113 | + tcg_out_qemu_ldst_i128(s, a0, a1, a2, a3, true); | ||
114 | + break; | ||
115 | case INDEX_op_qemu_st_a32_i32: | ||
116 | case INDEX_op_qemu_st_a64_i32: | ||
117 | tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32); | ||
118 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, | ||
119 | case INDEX_op_qemu_st_a64_i64: | ||
120 | tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64); | ||
121 | break; | ||
122 | + case INDEX_op_qemu_st_a32_i128: | ||
123 | + case INDEX_op_qemu_st_a64_i128: | ||
124 | + tcg_out_qemu_ldst_i128(s, a0, a1, a2, a3, false); | ||
125 | + break; | ||
126 | |||
127 | case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ | ||
128 | case INDEX_op_mov_i64: | ||
129 | @@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) | ||
130 | case INDEX_op_qemu_st_a64_i64: | ||
131 | return C_O0_I2(rZ, r); | ||
132 | |||
133 | + case INDEX_op_qemu_ld_a32_i128: | ||
134 | + case INDEX_op_qemu_ld_a64_i128: | ||
135 | + return C_O2_I1(r, r, r); | ||
136 | + | ||
137 | + case INDEX_op_qemu_st_a32_i128: | ||
138 | + case INDEX_op_qemu_st_a64_i128: | ||
139 | + return C_O0_I3(r, r, r); | ||
140 | + | ||
141 | case INDEX_op_brcond_i32: | ||
142 | case INDEX_op_brcond_i64: | ||
143 | return C_O0_I2(rZ, rZ); | ||
76 | -- | 144 | -- |
77 | 2.43.0 | 145 | 2.34.1 |
78 | |||
79 | diff view generated by jsdifflib |