Do not explicitly store zero to the NEON high part
when we can pass !is_q to clear_vec_high.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/translate-a64.c | 59 +++++++++++++++++++++++---------------
1 file changed, 36 insertions(+), 23 deletions(-)
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 729e746e25..d1c9150c4f 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -939,11 +939,10 @@ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
{
/* This always zero-extends and writes to a full 128 bit wide vector */
TCGv_i64 tmplo = tcg_temp_new_i64();
- TCGv_i64 tmphi;
+ TCGv_i64 tmphi = NULL;
if (size < 4) {
MemOp memop = s->be_data + size;
- tmphi = tcg_const_i64(0);
tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
} else {
bool be = s->be_data == MO_BE;
@@ -961,12 +960,13 @@ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
}
tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
- tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
-
tcg_temp_free_i64(tmplo);
- tcg_temp_free_i64(tmphi);
- clear_vec_high(s, true, destidx);
+ if (tmphi) {
+ tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
+ tcg_temp_free_i64(tmphi);
+ }
+ clear_vec_high(s, tmphi != NULL, destidx);
}
/*
@@ -6960,8 +6960,8 @@ static void disas_simd_ext(DisasContext *s, uint32_t insn)
return;
}
- tcg_resh = tcg_temp_new_i64();
tcg_resl = tcg_temp_new_i64();
+ tcg_resh = NULL;
/* Vd gets bits starting at pos bits into Vm:Vn. This is
* either extracting 128 bits from a 128:128 concatenation, or
@@ -6973,7 +6973,6 @@ static void disas_simd_ext(DisasContext *s, uint32_t insn)
read_vec_element(s, tcg_resh, rm, 0, MO_64);
do_ext64(s, tcg_resh, tcg_resl, pos);
}
- tcg_gen_movi_i64(tcg_resh, 0);
} else {
TCGv_i64 tcg_hh;
typedef struct {
@@ -6988,6 +6987,7 @@ static void disas_simd_ext(DisasContext *s, uint32_t insn)
pos -= 64;
}
+ tcg_resh = tcg_temp_new_i64();
read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
elt++;
read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
@@ -7003,9 +7003,12 @@ static void disas_simd_ext(DisasContext *s, uint32_t insn)
write_vec_element(s, tcg_resl, rd, 0, MO_64);
tcg_temp_free_i64(tcg_resl);
- write_vec_element(s, tcg_resh, rd, 1, MO_64);
- tcg_temp_free_i64(tcg_resh);
- clear_vec_high(s, true, rd);
+
+ if (is_q) {
+ write_vec_element(s, tcg_resh, rd, 1, MO_64);
+ tcg_temp_free_i64(tcg_resh);
+ }
+ clear_vec_high(s, is_q, rd);
}
/* TBL/TBX
@@ -7042,17 +7045,21 @@ static void disas_simd_tb(DisasContext *s, uint32_t insn)
* the input.
*/
tcg_resl = tcg_temp_new_i64();
- tcg_resh = tcg_temp_new_i64();
+ tcg_resh = NULL;
if (is_tblx) {
read_vec_element(s, tcg_resl, rd, 0, MO_64);
} else {
tcg_gen_movi_i64(tcg_resl, 0);
}
- if (is_tblx && is_q) {
- read_vec_element(s, tcg_resh, rd, 1, MO_64);
- } else {
- tcg_gen_movi_i64(tcg_resh, 0);
+
+ if (is_q) {
+ tcg_resh = tcg_temp_new_i64();
+ if (is_tblx) {
+ read_vec_element(s, tcg_resh, rd, 1, MO_64);
+ } else {
+ tcg_gen_movi_i64(tcg_resh, 0);
+ }
}
tcg_idx = tcg_temp_new_i64();
@@ -7072,9 +7079,12 @@ static void disas_simd_tb(DisasContext *s, uint32_t insn)
write_vec_element(s, tcg_resl, rd, 0, MO_64);
tcg_temp_free_i64(tcg_resl);
- write_vec_element(s, tcg_resh, rd, 1, MO_64);
- tcg_temp_free_i64(tcg_resh);
- clear_vec_high(s, true, rd);
+
+ if (is_q) {
+ write_vec_element(s, tcg_resh, rd, 1, MO_64);
+ tcg_temp_free_i64(tcg_resh);
+ }
+ clear_vec_high(s, is_q, rd);
}
/* ZIP/UZP/TRN
@@ -7111,7 +7121,7 @@ static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
}
tcg_resl = tcg_const_i64(0);
- tcg_resh = tcg_const_i64(0);
+ tcg_resh = is_q ? tcg_const_i64(0) : NULL;
tcg_res = tcg_temp_new_i64();
for (i = 0; i < elements; i++) {
@@ -7162,9 +7172,12 @@ static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
write_vec_element(s, tcg_resl, rd, 0, MO_64);
tcg_temp_free_i64(tcg_resl);
- write_vec_element(s, tcg_resh, rd, 1, MO_64);
- tcg_temp_free_i64(tcg_resh);
- clear_vec_high(s, true, rd);
+
+ if (is_q) {
+ write_vec_element(s, tcg_resh, rd, 1, MO_64);
+ tcg_temp_free_i64(tcg_resh);
+ }
+ clear_vec_high(s, is_q, rd);
}
/*
--
2.20.1
On 5/4/20 9:23 PM, Richard Henderson wrote:
> Do not explicitly store zero to the NEON high part
> when we can pass !is_q to clear_vec_high.
>
> Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
Patch easier to review with 'git-diff --function-context'.
> target/arm/translate-a64.c | 59 +++++++++++++++++++++++---------------
> 1 file changed, 36 insertions(+), 23 deletions(-)
>
> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
> index 729e746e25..d1c9150c4f 100644
> --- a/target/arm/translate-a64.c
> +++ b/target/arm/translate-a64.c
> @@ -939,11 +939,10 @@ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
> {
> /* This always zero-extends and writes to a full 128 bit wide vector */
> TCGv_i64 tmplo = tcg_temp_new_i64();
> - TCGv_i64 tmphi;
> + TCGv_i64 tmphi = NULL;
>
> if (size < 4) {
> MemOp memop = s->be_data + size;
> - tmphi = tcg_const_i64(0);
> tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
> } else {
> bool be = s->be_data == MO_BE;
> @@ -961,12 +960,13 @@ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
> }
>
> tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
> - tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
> -
> tcg_temp_free_i64(tmplo);
> - tcg_temp_free_i64(tmphi);
>
> - clear_vec_high(s, true, destidx);
> + if (tmphi) {
> + tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
> + tcg_temp_free_i64(tmphi);
> + }
> + clear_vec_high(s, tmphi != NULL, destidx);
OK.
> }
>
> /*
> @@ -6960,8 +6960,8 @@ static void disas_simd_ext(DisasContext *s, uint32_t insn)
> return;
> }
>
> - tcg_resh = tcg_temp_new_i64();
> tcg_resl = tcg_temp_new_i64();
> + tcg_resh = NULL;
>
> /* Vd gets bits starting at pos bits into Vm:Vn. This is
> * either extracting 128 bits from a 128:128 concatenation, or
> @@ -6973,7 +6973,6 @@ static void disas_simd_ext(DisasContext *s, uint32_t insn)
> read_vec_element(s, tcg_resh, rm, 0, MO_64);
but then ^^^^^^^^ tcg_resh is NULL...
> do_ext64(s, tcg_resh, tcg_resl, pos);
> }
> - tcg_gen_movi_i64(tcg_resh, 0);
> } else {
> TCGv_i64 tcg_hh;
> typedef struct {
> @@ -6988,6 +6987,7 @@ static void disas_simd_ext(DisasContext *s, uint32_t insn)
> pos -= 64;
> }
>
> + tcg_resh = tcg_temp_new_i64();
> read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
> elt++;
> read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
> @@ -7003,9 +7003,12 @@ static void disas_simd_ext(DisasContext *s, uint32_t insn)
>
> write_vec_element(s, tcg_resl, rd, 0, MO_64);
> tcg_temp_free_i64(tcg_resl);
> - write_vec_element(s, tcg_resh, rd, 1, MO_64);
> - tcg_temp_free_i64(tcg_resh);
> - clear_vec_high(s, true, rd);
> +
> + if (is_q) {
> + write_vec_element(s, tcg_resh, rd, 1, MO_64);
> + tcg_temp_free_i64(tcg_resh);
> + }
> + clear_vec_high(s, is_q, rd);
> }
>
> /* TBL/TBX
> @@ -7042,17 +7045,21 @@ static void disas_simd_tb(DisasContext *s, uint32_t insn)
> * the input.
> */
> tcg_resl = tcg_temp_new_i64();
> - tcg_resh = tcg_temp_new_i64();
> + tcg_resh = NULL;
>
> if (is_tblx) {
> read_vec_element(s, tcg_resl, rd, 0, MO_64);
> } else {
> tcg_gen_movi_i64(tcg_resl, 0);
> }
> - if (is_tblx && is_q) {
> - read_vec_element(s, tcg_resh, rd, 1, MO_64);
> - } else {
> - tcg_gen_movi_i64(tcg_resh, 0);
> +
> + if (is_q) {
> + tcg_resh = tcg_temp_new_i64();
> + if (is_tblx) {
> + read_vec_element(s, tcg_resh, rd, 1, MO_64);
> + } else {
> + tcg_gen_movi_i64(tcg_resh, 0);
> + }
> }
>
> tcg_idx = tcg_temp_new_i64();
> @@ -7072,9 +7079,12 @@ static void disas_simd_tb(DisasContext *s, uint32_t insn)
>
> write_vec_element(s, tcg_resl, rd, 0, MO_64);
> tcg_temp_free_i64(tcg_resl);
> - write_vec_element(s, tcg_resh, rd, 1, MO_64);
> - tcg_temp_free_i64(tcg_resh);
> - clear_vec_high(s, true, rd);
> +
> + if (is_q) {
> + write_vec_element(s, tcg_resh, rd, 1, MO_64);
> + tcg_temp_free_i64(tcg_resh);
> + }
> + clear_vec_high(s, is_q, rd);
OK.
> }
>
> /* ZIP/UZP/TRN
> @@ -7111,7 +7121,7 @@ static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
> }
>
> tcg_resl = tcg_const_i64(0);
> - tcg_resh = tcg_const_i64(0);
> + tcg_resh = is_q ? tcg_const_i64(0) : NULL;
> tcg_res = tcg_temp_new_i64();
>
> for (i = 0; i < elements; i++) {
> @@ -7162,9 +7172,12 @@ static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
More context:
...
ofs = i * esize;
if (ofs < 64) {
tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
} else {
tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
here ^^^^^^^^ tcg_resh is NULL too.
}
}
tcg_temp_free_i64(tcg_res);
>
> write_vec_element(s, tcg_resl, rd, 0, MO_64);
> tcg_temp_free_i64(tcg_resl);
> - write_vec_element(s, tcg_resh, rd, 1, MO_64);
> - tcg_temp_free_i64(tcg_resh);
> - clear_vec_high(s, true, rd);
> +
> + if (is_q) {
> + write_vec_element(s, tcg_resh, rd, 1, MO_64);
> + tcg_temp_free_i64(tcg_resh);
> + }
> + clear_vec_high(s, is_q, rd);
> }
>
> /*
>
On 5/4/20 11:09 PM, Philippe Mathieu-Daudé wrote:
>> @@ -7111,7 +7121,7 @@ static void disas_simd_zip_trn
>> }
>> tcg_resl = tcg_const_i64(0);
>> - tcg_resh = tcg_const_i64(0);
>> + tcg_resh = is_q ? tcg_const_i64(0) : NULL;
>> tcg_res = tcg_temp_new_i64();
>> for (i = 0; i < elements; i++) {
>> @@ -7162,9 +7172,12 @@ static void disas_simd_zip_trn(DisasContext *s,
>> uint32_t insn)
>
> More context:
>
> ...
> ofs = i * esize;
> if (ofs < 64) {
> tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
> tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
> } else {
> tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
> tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
>
> here ^^^^^^^^ tcg_resh is NULL too.
>
> }
> }
When is_q is false, the vector length is 64. Thus that line is not reachable.
r~
On Tue, May 5, 2020 at 4:22 PM Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> On 5/4/20 11:09 PM, Philippe Mathieu-Daudé wrote:
> >> @@ -7111,7 +7121,7 @@ static void disas_simd_zip_trn
> >> }
> >> tcg_resl = tcg_const_i64(0);
> >> - tcg_resh = tcg_const_i64(0);
> >> + tcg_resh = is_q ? tcg_const_i64(0) : NULL;
> >> tcg_res = tcg_temp_new_i64();
> >> for (i = 0; i < elements; i++) {
> >> @@ -7162,9 +7172,12 @@ static void disas_simd_zip_trn(DisasContext *s,
> >> uint32_t insn)
> >
> > More context:
> >
> > ...
> > ofs = i * esize;
> > if (ofs < 64) {
> > tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
> > tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
> > } else {
> > tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
> > tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
> >
> > here ^^^^^^^^ tcg_resh is NULL too.
> >
> > }
> > }
>
> When is_q is false, the vector length is 64. Thus that line is not reachable.
OK.
>
>
> r~
© 2016 - 2025 Red Hat, Inc.