1 | The following changes since commit 34eac35f893664eb8545b98142e23d9954722766: | 1 | Pretty small still, but there are two patches that ought |
---|---|---|---|
2 | to get backported to stable, so no point in delaying. | ||
2 | 3 | ||
3 | Merge tag 'pull-riscv-to-apply-20240110' of https://github.com/alistair23/qemu into staging (2024-01-10 11:41:56 +0000) | 4 | r~ |
5 | |||
6 | The following changes since commit a5ba0a7e4e150d1350a041f0d0ef9ca6c8d7c307: | ||
7 | |||
8 | Merge tag 'pull-aspeed-20241211' of https://github.com/legoater/qemu into staging (2024-12-11 15:16:47 +0000) | ||
4 | 9 | ||
5 | are available in the Git repository at: | 10 | are available in the Git repository at: |
6 | 11 | ||
7 | https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20240111 | 12 | https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20241212 |
8 | 13 | ||
9 | for you to fetch changes up to 1d513e06d96697f44de4a1b85c6ff627c443e306: | 14 | for you to fetch changes up to 7ac87b14a92234b6a89b701b4043ad6cf8bdcccf: |
10 | 15 | ||
11 | util: fix build with musl libc on ppc64le (2024-01-11 08:48:16 +1100) | 16 | target/sparc: Use memcpy() and remove memcpy32() (2024-12-12 14:28:38 -0600) |
12 | 17 | ||
13 | ---------------------------------------------------------------- | 18 | ---------------------------------------------------------------- |
14 | tcg/i386: Use more 8-bit immediate forms for add, sub, or, xor | 19 | tcg: Reset free_temps before tcg_optimize |
15 | tcg/ppc: Use new registers for LQ destination | 20 | tcg/riscv: Fix StoreStore barrier generation |
16 | util: fix build with musl libc on ppc64le | 21 | include/exec: Introduce fpst alias in helper-head.h.inc |
22 | target/sparc: Use memcpy() and remove memcpy32() | ||
17 | 23 | ||
18 | ---------------------------------------------------------------- | 24 | ---------------------------------------------------------------- |
19 | Natanael Copa (1): | 25 | Philippe Mathieu-Daudé (1): |
20 | util: fix build with musl libc on ppc64le | 26 | target/sparc: Use memcpy() and remove memcpy32() |
21 | 27 | ||
22 | Paolo Bonzini (2): | 28 | Richard Henderson (2): |
23 | tcg/i386: convert add/sub of 128 to sub/add of -128 | 29 | tcg: Reset free_temps before tcg_optimize |
24 | tcg/i386: use 8-bit OR or XOR for unsigned 8-bit immediates | 30 | include/exec: Introduce fpst alias in helper-head.h.inc |
25 | 31 | ||
26 | Richard Henderson (1): | 32 | Roman Artemev (1): |
27 | tcg/ppc: Use new registers for LQ destination | 33 | tcg/riscv: Fix StoreStore barrier generation |
28 | 34 | ||
29 | tcg/ppc/tcg-target-con-set.h | 2 +- | 35 | include/tcg/tcg-temp-internal.h | 6 ++++++ |
30 | tcg/tcg.c | 21 ++++++++++++---- | 36 | accel/tcg/plugin-gen.c | 2 +- |
31 | util/cpuinfo-ppc.c | 6 ++--- | 37 | target/sparc/win_helper.c | 26 ++++++++------------------ |
32 | tcg/i386/tcg-target.c.inc | 60 +++++++++++++++++++++++++++++++++----------- | 38 | tcg/tcg.c | 5 ++++- |
33 | tcg/ppc/tcg-target.c.inc | 3 ++- | 39 | include/exec/helper-head.h.inc | 3 +++ |
34 | 5 files changed, 67 insertions(+), 25 deletions(-) | 40 | tcg/riscv/tcg-target.c.inc | 2 +- |
41 | 6 files changed, 23 insertions(+), 21 deletions(-) | ||
42 | diff view generated by jsdifflib |
1 | LQ has a constraint that RTp != RA, else SIGILL. | 1 | When allocating new temps during tcg_optmize, do not re-use |
---|---|---|---|
2 | Therefore, force the destination of INDEX_op_qemu_*_ld128 to be a | 2 | any EBB temps that were used within the TB. We do not have |
3 | new register pair, so that it cannot overlap the input address. | 3 | any idea what span of the TB in which the temp was live. |
4 | 4 | ||
5 | This requires new support in process_op_defs and tcg_reg_alloc_op. | 5 | Introduce tcg_temp_ebb_reset_freed and use before tcg_optimize, |
6 | as well as replacing the equivalent in plugin_gen_inject and | ||
7 | tcg_func_start. | ||
6 | 8 | ||
7 | Cc: qemu-stable@nongnu.org | 9 | Cc: qemu-stable@nongnu.org |
8 | Fixes: 526cd4ec01f ("tcg/ppc: Support 128-bit load/store") | 10 | Fixes: fb04ab7ddd8 ("tcg/optimize: Lower TCG_COND_TST{EQ,NE} if unsupported") |
11 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2711 | ||
12 | Reported-by: wannacu <wannacu2049@gmail.com> | ||
13 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
14 | Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> | ||
9 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | 15 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
10 | Message-Id: <20240102013456.131846-1-richard.henderson@linaro.org> | ||
11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | ||
12 | --- | 16 | --- |
13 | tcg/ppc/tcg-target-con-set.h | 2 +- | 17 | include/tcg/tcg-temp-internal.h | 6 ++++++ |
14 | tcg/tcg.c | 21 ++++++++++++++++----- | 18 | accel/tcg/plugin-gen.c | 2 +- |
15 | tcg/ppc/tcg-target.c.inc | 3 ++- | 19 | tcg/tcg.c | 5 ++++- |
16 | 3 files changed, 19 insertions(+), 7 deletions(-) | 20 | 3 files changed, 11 insertions(+), 2 deletions(-) |
17 | 21 | ||
18 | diff --git a/tcg/ppc/tcg-target-con-set.h b/tcg/ppc/tcg-target-con-set.h | 22 | diff --git a/include/tcg/tcg-temp-internal.h b/include/tcg/tcg-temp-internal.h |
19 | index XXXXXXX..XXXXXXX 100644 | 23 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/tcg/ppc/tcg-target-con-set.h | 24 | --- a/include/tcg/tcg-temp-internal.h |
21 | +++ b/tcg/ppc/tcg-target-con-set.h | 25 | +++ b/include/tcg/tcg-temp-internal.h |
22 | @@ -XXX,XX +XXX,XX @@ C_O1_I3(v, v, v, v) | 26 | @@ -XXX,XX +XXX,XX @@ TCGv_i64 tcg_temp_ebb_new_i64(void); |
23 | C_O1_I4(r, r, ri, rZ, rZ) | 27 | TCGv_ptr tcg_temp_ebb_new_ptr(void); |
24 | C_O1_I4(r, r, r, ri, ri) | 28 | TCGv_i128 tcg_temp_ebb_new_i128(void); |
25 | C_O2_I1(r, r, r) | 29 | |
26 | -C_O2_I1(o, m, r) | 30 | +/* Forget all freed EBB temps, so that new allocations produce new temps. */ |
27 | +C_N1O1_I1(o, m, r) | 31 | +static inline void tcg_temp_ebb_reset_freed(TCGContext *s) |
28 | C_O2_I2(r, r, r, r) | 32 | +{ |
29 | C_O2_I4(r, r, rI, rZM, r, r) | 33 | + memset(s->free_temps, 0, sizeof(s->free_temps)); |
30 | C_O2_I4(r, r, r, r, rI, rZM) | 34 | +} |
35 | + | ||
36 | #endif /* TCG_TEMP_FREE_H */ | ||
37 | diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/accel/tcg/plugin-gen.c | ||
40 | +++ b/accel/tcg/plugin-gen.c | ||
41 | @@ -XXX,XX +XXX,XX @@ static void plugin_gen_inject(struct qemu_plugin_tb *plugin_tb) | ||
42 | * that might be live within the existing opcode stream. | ||
43 | * The simplest solution is to release them all and create new. | ||
44 | */ | ||
45 | - memset(tcg_ctx->free_temps, 0, sizeof(tcg_ctx->free_temps)); | ||
46 | + tcg_temp_ebb_reset_freed(tcg_ctx); | ||
47 | |||
48 | QTAILQ_FOREACH_SAFE(op, &tcg_ctx->ops, link, next) { | ||
49 | switch (op->opc) { | ||
31 | diff --git a/tcg/tcg.c b/tcg/tcg.c | 50 | diff --git a/tcg/tcg.c b/tcg/tcg.c |
32 | index XXXXXXX..XXXXXXX 100644 | 51 | index XXXXXXX..XXXXXXX 100644 |
33 | --- a/tcg/tcg.c | 52 | --- a/tcg/tcg.c |
34 | +++ b/tcg/tcg.c | 53 | +++ b/tcg/tcg.c |
35 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1, | 54 | @@ -XXX,XX +XXX,XX @@ void tcg_func_start(TCGContext *s) |
36 | #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4), | 55 | s->nb_temps = s->nb_globals; |
37 | 56 | ||
38 | #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2), | 57 | /* No temps have been previously allocated for size or locality. */ |
39 | +#define C_N1O1_I1(O1, O2, I1) C_PFX3(c_n1o1_i1_, O1, O2, I1), | 58 | - memset(s->free_temps, 0, sizeof(s->free_temps)); |
40 | #define C_N2_I1(O1, O2, I1) C_PFX3(c_n2_i1_, O1, O2, I1), | 59 | + tcg_temp_ebb_reset_freed(s); |
41 | 60 | ||
42 | #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1), | 61 | /* No constant temps have been previously allocated. */ |
43 | @@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode); | 62 | for (int i = 0; i < TCG_TYPE_COUNT; ++i) { |
44 | #undef C_O1_I3 | 63 | @@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) |
45 | #undef C_O1_I4 | 64 | } |
46 | #undef C_N1_I2 | 65 | #endif |
47 | +#undef C_N1O1_I1 | 66 | |
48 | #undef C_N2_I1 | 67 | + /* Do not reuse any EBB that may be allocated within the TB. */ |
49 | #undef C_O2_I1 | 68 | + tcg_temp_ebb_reset_freed(s); |
50 | #undef C_O2_I2 | 69 | + |
51 | @@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode); | 70 | tcg_optimize(s); |
52 | #define C_O1_I4(O1, I1, I2, I3, I4) { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } }, | 71 | |
53 | 72 | reachable_code_pass(s); | |
54 | #define C_N1_I2(O1, I1, I2) { .args_ct_str = { "&" #O1, #I1, #I2 } }, | ||
55 | +#define C_N1O1_I1(O1, O2, I1) { .args_ct_str = { "&" #O1, #O2, #I1 } }, | ||
56 | #define C_N2_I1(O1, O2, I1) { .args_ct_str = { "&" #O1, "&" #O2, #I1 } }, | ||
57 | |||
58 | #define C_O2_I1(O1, O2, I1) { .args_ct_str = { #O1, #O2, #I1 } }, | ||
59 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef constraint_sets[] = { | ||
60 | #undef C_O1_I3 | ||
61 | #undef C_O1_I4 | ||
62 | #undef C_N1_I2 | ||
63 | +#undef C_N1O1_I1 | ||
64 | #undef C_N2_I1 | ||
65 | #undef C_O2_I1 | ||
66 | #undef C_O2_I2 | ||
67 | @@ -XXX,XX +XXX,XX @@ static const TCGTargetOpDef constraint_sets[] = { | ||
68 | #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4) | ||
69 | |||
70 | #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2) | ||
71 | +#define C_N1O1_I1(O1, O2, I1) C_PFX3(c_n1o1_i1_, O1, O2, I1) | ||
72 | #define C_N2_I1(O1, O2, I1) C_PFX3(c_n2_i1_, O1, O2, I1) | ||
73 | |||
74 | #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1) | ||
75 | @@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s) | ||
76 | .pair = 2, | ||
77 | .pair_index = o, | ||
78 | .regs = def->args_ct[o].regs << 1, | ||
79 | + .newreg = def->args_ct[o].newreg, | ||
80 | }; | ||
81 | def->args_ct[o].pair = 1; | ||
82 | def->args_ct[o].pair_index = i; | ||
83 | @@ -XXX,XX +XXX,XX @@ static void process_op_defs(TCGContext *s) | ||
84 | .pair = 1, | ||
85 | .pair_index = o, | ||
86 | .regs = def->args_ct[o].regs >> 1, | ||
87 | + .newreg = def->args_ct[o].newreg, | ||
88 | }; | ||
89 | def->args_ct[o].pair = 2; | ||
90 | def->args_ct[o].pair_index = i; | ||
91 | @@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) | ||
92 | break; | ||
93 | |||
94 | case 1: /* first of pair */ | ||
95 | - tcg_debug_assert(!arg_ct->newreg); | ||
96 | if (arg_ct->oalias) { | ||
97 | reg = new_args[arg_ct->alias_index]; | ||
98 | - break; | ||
99 | + } else if (arg_ct->newreg) { | ||
100 | + reg = tcg_reg_alloc_pair(s, arg_ct->regs, | ||
101 | + i_allocated_regs | o_allocated_regs, | ||
102 | + output_pref(op, k), | ||
103 | + ts->indirect_base); | ||
104 | + } else { | ||
105 | + reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs, | ||
106 | + output_pref(op, k), | ||
107 | + ts->indirect_base); | ||
108 | } | ||
109 | - reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs, | ||
110 | - output_pref(op, k), ts->indirect_base); | ||
111 | break; | ||
112 | |||
113 | case 2: /* second of pair */ | ||
114 | - tcg_debug_assert(!arg_ct->newreg); | ||
115 | if (arg_ct->oalias) { | ||
116 | reg = new_args[arg_ct->alias_index]; | ||
117 | } else { | ||
118 | diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc | ||
119 | index XXXXXXX..XXXXXXX 100644 | ||
120 | --- a/tcg/ppc/tcg-target.c.inc | ||
121 | +++ b/tcg/ppc/tcg-target.c.inc | ||
122 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi, | ||
123 | tcg_debug_assert(!need_bswap); | ||
124 | tcg_debug_assert(datalo & 1); | ||
125 | tcg_debug_assert(datahi == datalo - 1); | ||
126 | + tcg_debug_assert(!is_ld || datahi != index); | ||
127 | insn = is_ld ? LQ : STQ; | ||
128 | tcg_out32(s, insn | TAI(datahi, index, 0)); | ||
129 | } else { | ||
130 | @@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) | ||
131 | |||
132 | case INDEX_op_qemu_ld_a32_i128: | ||
133 | case INDEX_op_qemu_ld_a64_i128: | ||
134 | - return C_O2_I1(o, m, r); | ||
135 | + return C_N1O1_I1(o, m, r); | ||
136 | case INDEX_op_qemu_st_a32_i128: | ||
137 | case INDEX_op_qemu_st_a64_i128: | ||
138 | return C_O0_I3(o, m, r); | ||
139 | -- | 73 | -- |
140 | 2.34.1 | 74 | 2.43.0 |
141 | 75 | ||
142 | 76 | diff view generated by jsdifflib |
1 | From: Natanael Copa <ncopa@alpinelinux.org> | 1 | From: Roman Artemev <roman.artemev@syntacore.com> |
---|---|---|---|
2 | 2 | ||
3 | Use PPC_FEATURE2_ISEL and PPC_FEATURE2_VEC_CRYPTO from linux headers | 3 | On RISC-V to StoreStore barrier corresponds |
4 | instead of the GNU specific PPC_FEATURE2_HAS_ISEL and | 4 | `fence w, w` not `fence r, r` |
5 | PPC_FEATURE2_HAS_VEC_CRYPTO. This fixes build with musl libc. | ||
6 | 5 | ||
7 | Cc: qemu-stable@nongnu.org | 6 | Cc: qemu-stable@nongnu.org |
8 | Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1861 | 7 | Fixes: efbea94c76b ("tcg/riscv: Add slowpath load and store instructions") |
9 | Signed-off-by: Natanael Copa <ncopa@alpinelinux.org> | 8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
10 | Fixes: 63922f467a ("tcg/ppc: Replace HAVE_ISEL macro with a variable") | 9 | Signed-off-by: Denis Tomashev <denis.tomashev@syntacore.com> |
11 | Fixes: 68f340d4cd ("tcg/ppc: Enable Altivec detection") | 10 | Signed-off-by: Roman Artemev <roman.artemev@syntacore.com> |
12 | Message-Id: <20231219105236.7059-1-ncopa@alpinelinux.org> | 11 | Message-ID: <e2f2131e294a49e79959d4fa9ec02cf4@syntacore.com> |
13 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 12 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
14 | --- | 13 | --- |
15 | util/cpuinfo-ppc.c | 6 +++--- | 14 | tcg/riscv/tcg-target.c.inc | 2 +- |
16 | 1 file changed, 3 insertions(+), 3 deletions(-) | 15 | 1 file changed, 1 insertion(+), 1 deletion(-) |
17 | 16 | ||
18 | diff --git a/util/cpuinfo-ppc.c b/util/cpuinfo-ppc.c | 17 | diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc |
19 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/util/cpuinfo-ppc.c | 19 | --- a/tcg/riscv/tcg-target.c.inc |
21 | +++ b/util/cpuinfo-ppc.c | 20 | +++ b/tcg/riscv/tcg-target.c.inc |
22 | @@ -XXX,XX +XXX,XX @@ | 21 | @@ -XXX,XX +XXX,XX @@ static void tcg_out_mb(TCGContext *s, TCGArg a0) |
23 | #include "qemu/osdep.h" | 22 | insn |= 0x02100000; |
24 | #include "host/cpuinfo.h" | ||
25 | |||
26 | +#include <asm/cputable.h> | ||
27 | #ifdef CONFIG_GETAUXVAL | ||
28 | # include <sys/auxv.h> | ||
29 | #else | ||
30 | -# include <asm/cputable.h> | ||
31 | # include "elf.h" | ||
32 | #endif | ||
33 | |||
34 | @@ -XXX,XX +XXX,XX @@ unsigned __attribute__((constructor)) cpuinfo_init(void) | ||
35 | info |= CPUINFO_V2_06; | ||
36 | } | 23 | } |
37 | 24 | if (a0 & TCG_MO_ST_ST) { | |
38 | - if (hwcap2 & PPC_FEATURE2_HAS_ISEL) { | 25 | - insn |= 0x02200000; |
39 | + if (hwcap2 & PPC_FEATURE2_ISEL) { | 26 | + insn |= 0x01100000; |
40 | info |= CPUINFO_ISEL; | ||
41 | } | 27 | } |
42 | if (hwcap & PPC_FEATURE_HAS_ALTIVEC) { | 28 | tcg_out32(s, insn); |
43 | @@ -XXX,XX +XXX,XX @@ unsigned __attribute__((constructor)) cpuinfo_init(void) | 29 | } |
44 | * always have both anyway, since VSX came with Power7 | ||
45 | * and crypto came with Power8. | ||
46 | */ | ||
47 | - if (hwcap2 & PPC_FEATURE2_HAS_VEC_CRYPTO) { | ||
48 | + if (hwcap2 & PPC_FEATURE2_VEC_CRYPTO) { | ||
49 | info |= CPUINFO_CRYPTO; | ||
50 | } | ||
51 | } | ||
52 | -- | 30 | -- |
53 | 2.34.1 | 31 | 2.43.0 | diff view generated by jsdifflib |
1 | From: Paolo Bonzini <pbonzini@redhat.com> | 1 | This allows targets to declare that the helper requires a |
---|---|---|---|
2 | float_status pointer and instead of a generic void pointer. | ||
2 | 3 | ||
3 | In the case where OR or XOR has an 8-bit immediate between 128 and 255, | 4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
4 | we can operate on a low-byte register and shorten the output by two or | ||
5 | three bytes (two if a prefix byte is needed for REX.B). | ||
6 | |||
7 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
8 | Message-Id: <20231228120524.70239-1-pbonzini@redhat.com> | ||
9 | [rth: Incorporate into switch.] | ||
10 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 5 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
11 | --- | 6 | --- |
12 | tcg/i386/tcg-target.c.inc | 11 +++++++++++ | 7 | include/exec/helper-head.h.inc | 3 +++ |
13 | 1 file changed, 11 insertions(+) | 8 | 1 file changed, 3 insertions(+) |
14 | 9 | ||
15 | diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc | 10 | diff --git a/include/exec/helper-head.h.inc b/include/exec/helper-head.h.inc |
16 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/tcg/i386/tcg-target.c.inc | 12 | --- a/include/exec/helper-head.h.inc |
18 | +++ b/tcg/i386/tcg-target.c.inc | 13 | +++ b/include/exec/helper-head.h.inc |
19 | @@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece) | 14 | @@ -XXX,XX +XXX,XX @@ |
20 | #define P_VEXL 0x80000 /* Set VEX.L = 1 */ | 15 | #define dh_alias_ptr ptr |
21 | #define P_EVEX 0x100000 /* Requires EVEX encoding */ | 16 | #define dh_alias_cptr ptr |
22 | 17 | #define dh_alias_env ptr | |
23 | +#define OPC_ARITH_EbIb (0x80) | 18 | +#define dh_alias_fpst ptr |
24 | #define OPC_ARITH_EvIz (0x81) | 19 | #define dh_alias_void void |
25 | #define OPC_ARITH_EvIb (0x83) | 20 | #define dh_alias_noreturn noreturn |
26 | #define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */ | 21 | #define dh_alias(t) glue(dh_alias_, t) |
27 | @@ -XXX,XX +XXX,XX @@ static void tgen_arithi(TCGContext *s, int c, int r0, | 22 | @@ -XXX,XX +XXX,XX @@ |
28 | return; | 23 | #define dh_ctype_ptr void * |
29 | } | 24 | #define dh_ctype_cptr const void * |
30 | break; | 25 | #define dh_ctype_env CPUArchState * |
31 | + | 26 | +#define dh_ctype_fpst float_status * |
32 | + case ARITH_OR: | 27 | #define dh_ctype_void void |
33 | + case ARITH_XOR: | 28 | #define dh_ctype_noreturn G_NORETURN void |
34 | + if (val >= 0x80 && val <= 0xff | 29 | #define dh_ctype(t) dh_ctype_##t |
35 | + && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) { | 30 | @@ -XXX,XX +XXX,XX @@ |
36 | + tcg_out_modrm(s, OPC_ARITH_EbIb + P_REXB_RM, c, r0); | 31 | #define dh_typecode_f64 dh_typecode_i64 |
37 | + tcg_out8(s, val); | 32 | #define dh_typecode_cptr dh_typecode_ptr |
38 | + return; | 33 | #define dh_typecode_env dh_typecode_ptr |
39 | + } | 34 | +#define dh_typecode_fpst dh_typecode_ptr |
40 | + break; | 35 | #define dh_typecode(t) dh_typecode_##t |
41 | } | 36 | |
42 | 37 | #define dh_callflag_i32 0 | |
43 | if (val == (int8_t)val) { | ||
44 | -- | 38 | -- |
45 | 2.34.1 | 39 | 2.43.0 |
40 | |||
41 | diff view generated by jsdifflib |
1 | From: Paolo Bonzini <pbonzini@redhat.com> | 1 | From: Philippe Mathieu-Daudé <philmd@linaro.org> |
---|---|---|---|
2 | 2 | ||
3 | Extend the existing conditional that generates INC/DEC, to also swap an | 3 | Rather than manually copying each register, use |
4 | ADD for a SUB and vice versa when the immediate is 128. This facilitates | 4 | the libc memcpy(), which is well optimized nowadays. |
5 | using OPC_ARITH_EvIb instead of OPC_ARITH_EvIz. | ||
6 | 5 | ||
7 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | 6 | Suggested-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> |
8 | Message-Id: <20231228120514.70205-1-pbonzini@redhat.com> | 7 | Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> |
9 | [rth: Use a switch on C] | 8 | Reviewed-by: Richard Henderson <richard.henderson@linaro.org> |
9 | Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
10 | Message-ID: <20241205205418.67613-1-philmd@linaro.org> | ||
10 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> | 11 | Signed-off-by: Richard Henderson <richard.henderson@linaro.org> |
11 | --- | 12 | --- |
12 | tcg/i386/tcg-target.c.inc | 49 +++++++++++++++++++++++++++------------ | 13 | target/sparc/win_helper.c | 26 ++++++++------------------ |
13 | 1 file changed, 34 insertions(+), 15 deletions(-) | 14 | 1 file changed, 8 insertions(+), 18 deletions(-) |
14 | 15 | ||
15 | diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc | 16 | diff --git a/target/sparc/win_helper.c b/target/sparc/win_helper.c |
16 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/tcg/i386/tcg-target.c.inc | 18 | --- a/target/sparc/win_helper.c |
18 | +++ b/tcg/i386/tcg-target.c.inc | 19 | +++ b/target/sparc/win_helper.c |
19 | @@ -XXX,XX +XXX,XX @@ static void tgen_arithi(TCGContext *s, int c, int r0, | 20 | @@ -XXX,XX +XXX,XX @@ |
20 | c &= 7; | 21 | #include "exec/helper-proto.h" |
22 | #include "trace.h" | ||
23 | |||
24 | -static inline void memcpy32(target_ulong *dst, const target_ulong *src) | ||
25 | -{ | ||
26 | - dst[0] = src[0]; | ||
27 | - dst[1] = src[1]; | ||
28 | - dst[2] = src[2]; | ||
29 | - dst[3] = src[3]; | ||
30 | - dst[4] = src[4]; | ||
31 | - dst[5] = src[5]; | ||
32 | - dst[6] = src[6]; | ||
33 | - dst[7] = src[7]; | ||
34 | -} | ||
35 | - | ||
36 | void cpu_set_cwp(CPUSPARCState *env, int new_cwp) | ||
37 | { | ||
38 | /* put the modified wrap registers at their proper location */ | ||
39 | if (env->cwp == env->nwindows - 1) { | ||
40 | - memcpy32(env->regbase, env->regbase + env->nwindows * 16); | ||
41 | + memcpy(env->regbase, env->regbase + env->nwindows * 16, | ||
42 | + sizeof(env->gregs)); | ||
21 | } | 43 | } |
22 | 44 | env->cwp = new_cwp; | |
23 | - /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce | 45 | |
24 | - partial flags update stalls on Pentium4 and are not recommended | 46 | /* put the wrap registers at their temporary location */ |
25 | - by current Intel optimization manuals. */ | 47 | if (new_cwp == env->nwindows - 1) { |
26 | - if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) { | 48 | - memcpy32(env->regbase + env->nwindows * 16, env->regbase); |
27 | - int is_inc = (c == ARITH_ADD) ^ (val < 0); | 49 | + memcpy(env->regbase + env->nwindows * 16, env->regbase, |
28 | - if (TCG_TARGET_REG_BITS == 64) { | 50 | + sizeof(env->gregs)); |
29 | - /* The single-byte increment encodings are re-tasked as the | ||
30 | - REX prefixes. Use the MODRM encoding. */ | ||
31 | - tcg_out_modrm(s, OPC_GRP5 + rexw, | ||
32 | - (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0); | ||
33 | - } else { | ||
34 | - tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0); | ||
35 | + switch (c) { | ||
36 | + case ARITH_ADD: | ||
37 | + case ARITH_SUB: | ||
38 | + if (!cf) { | ||
39 | + /* | ||
40 | + * ??? While INC is 2 bytes shorter than ADDL $1, they also induce | ||
41 | + * partial flags update stalls on Pentium4 and are not recommended | ||
42 | + * by current Intel optimization manuals. | ||
43 | + */ | ||
44 | + if (val == 1 || val == -1) { | ||
45 | + int is_inc = (c == ARITH_ADD) ^ (val < 0); | ||
46 | + if (TCG_TARGET_REG_BITS == 64) { | ||
47 | + /* | ||
48 | + * The single-byte increment encodings are re-tasked | ||
49 | + * as the REX prefixes. Use the MODRM encoding. | ||
50 | + */ | ||
51 | + tcg_out_modrm(s, OPC_GRP5 + rexw, | ||
52 | + (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0); | ||
53 | + } else { | ||
54 | + tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0); | ||
55 | + } | ||
56 | + return; | ||
57 | + } | ||
58 | + if (val == 128) { | ||
59 | + /* | ||
60 | + * Facilitate using an 8-bit immediate. Carry is inverted | ||
61 | + * by this transformation, so do it only if cf == 0. | ||
62 | + */ | ||
63 | + c ^= ARITH_ADD ^ ARITH_SUB; | ||
64 | + val = -128; | ||
65 | + } | ||
66 | } | ||
67 | - return; | ||
68 | - } | ||
69 | + break; | ||
70 | |||
71 | - if (c == ARITH_AND) { | ||
72 | + case ARITH_AND: | ||
73 | if (TCG_TARGET_REG_BITS == 64) { | ||
74 | if (val == 0xffffffffu) { | ||
75 | tcg_out_ext32u(s, r0, r0); | ||
76 | @@ -XXX,XX +XXX,XX @@ static void tgen_arithi(TCGContext *s, int c, int r0, | ||
77 | tcg_out_ext16u(s, r0, r0); | ||
78 | return; | ||
79 | } | ||
80 | + break; | ||
81 | } | 51 | } |
82 | 52 | env->regwptr = env->regbase + (new_cwp * 16); | |
83 | if (val == (int8_t)val) { | 53 | } |
54 | @@ -XXX,XX +XXX,XX @@ void cpu_gl_switch_gregs(CPUSPARCState *env, uint32_t new_gl) | ||
55 | dst = get_gl_gregset(env, env->gl); | ||
56 | |||
57 | if (src != dst) { | ||
58 | - memcpy32(dst, env->gregs); | ||
59 | - memcpy32(env->gregs, src); | ||
60 | + memcpy(dst, env->gregs, sizeof(env->gregs)); | ||
61 | + memcpy(env->gregs, src, sizeof(env->gregs)); | ||
62 | } | ||
63 | } | ||
64 | |||
65 | @@ -XXX,XX +XXX,XX @@ void cpu_change_pstate(CPUSPARCState *env, uint32_t new_pstate) | ||
66 | /* Switch global register bank */ | ||
67 | src = get_gregset(env, new_pstate_regs); | ||
68 | dst = get_gregset(env, pstate_regs); | ||
69 | - memcpy32(dst, env->gregs); | ||
70 | - memcpy32(env->gregs, src); | ||
71 | + memcpy(dst, env->gregs, sizeof(env->gregs)); | ||
72 | + memcpy(env->gregs, src, sizeof(env->gregs)); | ||
73 | } else { | ||
74 | trace_win_helper_no_switch_pstate(new_pstate_regs); | ||
75 | } | ||
84 | -- | 76 | -- |
85 | 2.34.1 | 77 | 2.43.0 |
78 | |||
79 | diff view generated by jsdifflib |