Instead of copying a constant into a temporary with dupi,
use a vector constant directly.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/translate-sve.c | 128 +++++++++++++--------------------
1 file changed, 49 insertions(+), 79 deletions(-)
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index 9e2536dfe9..49d32fabc9 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -6081,9 +6081,9 @@ static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
if (top) {
if (shl == halfbits) {
- TCGv_vec t = tcg_temp_new_vec_matching(d);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
- tcg_gen_and_vec(vece, d, n, t);
+ tcg_gen_and_vec(vece, d, n,
+ tcg_constant_vec_matching(d, vece,
+ MAKE_64BIT_MASK(halfbits, halfbits)));
} else {
tcg_gen_sari_vec(vece, d, n, halfbits);
tcg_gen_shli_vec(vece, d, d, shl);
@@ -6138,18 +6138,18 @@ static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
if (top) {
if (shl == halfbits) {
- TCGv_vec t = tcg_temp_new_vec_matching(d);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
- tcg_gen_and_vec(vece, d, n, t);
+ tcg_gen_and_vec(vece, d, n,
+ tcg_constant_vec_matching(d, vece,
+ MAKE_64BIT_MASK(halfbits, halfbits)));
} else {
tcg_gen_shri_vec(vece, d, n, halfbits);
tcg_gen_shli_vec(vece, d, d, shl);
}
} else {
if (shl == 0) {
- TCGv_vec t = tcg_temp_new_vec_matching(d);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
- tcg_gen_and_vec(vece, d, n, t);
+ tcg_gen_and_vec(vece, d, n,
+ tcg_constant_vec_matching(d, vece,
+ MAKE_64BIT_MASK(0, halfbits)));
} else {
tcg_gen_shli_vec(vece, d, n, halfbits);
tcg_gen_shri_vec(vece, d, d, halfbits - shl);
@@ -6317,18 +6317,14 @@ static const TCGOpcode sqxtn_list[] = {
static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t mask = (1ull << halfbits) - 1;
int64_t min = -1ull << (halfbits - 1);
int64_t max = -min - 1;
- tcg_gen_dupi_vec(vece, t, min);
- tcg_gen_smax_vec(vece, d, n, t);
- tcg_gen_dupi_vec(vece, t, max);
- tcg_gen_smin_vec(vece, d, d, t);
- tcg_gen_dupi_vec(vece, t, mask);
- tcg_gen_and_vec(vece, d, d, t);
+ tcg_gen_smax_vec(vece, d, n, tcg_constant_vec_matching(d, vece, min));
+ tcg_gen_smin_vec(vece, d, d, tcg_constant_vec_matching(d, vece, max));
+ tcg_gen_and_vec(vece, d, d, tcg_constant_vec_matching(d, vece, mask));
}
static const GVecGen2 sqxtnb_ops[3] = {
@@ -6349,19 +6345,15 @@ TRANS_FEAT(SQXTNB, aa64_sve2, do_narrow_extract, a, sqxtnb_ops)
static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t mask = (1ull << halfbits) - 1;
int64_t min = -1ull << (halfbits - 1);
int64_t max = -min - 1;
- tcg_gen_dupi_vec(vece, t, min);
- tcg_gen_smax_vec(vece, n, n, t);
- tcg_gen_dupi_vec(vece, t, max);
- tcg_gen_smin_vec(vece, n, n, t);
+ tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, min));
+ tcg_gen_smin_vec(vece, n, n, tcg_constant_vec_matching(d, vece, max));
tcg_gen_shli_vec(vece, n, n, halfbits);
- tcg_gen_dupi_vec(vece, t, mask);
- tcg_gen_bitsel_vec(vece, d, t, d, n);
+ tcg_gen_bitsel_vec(vece, d, tcg_constant_vec_matching(d, vece, mask), d, n);
}
static const GVecGen2 sqxtnt_ops[3] = {
@@ -6389,12 +6381,10 @@ static const TCGOpcode uqxtn_list[] = {
static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = (1ull << halfbits) - 1;
- tcg_gen_dupi_vec(vece, t, max);
- tcg_gen_umin_vec(vece, d, n, t);
+ tcg_gen_umin_vec(vece, d, n, tcg_constant_vec_matching(d, vece, max));
}
static const GVecGen2 uqxtnb_ops[3] = {
@@ -6415,14 +6405,13 @@ TRANS_FEAT(UQXTNB, aa64_sve2, do_narrow_extract, a, uqxtnb_ops)
static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = (1ull << halfbits) - 1;
+ TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max);
- tcg_gen_dupi_vec(vece, t, max);
- tcg_gen_umin_vec(vece, n, n, t);
+ tcg_gen_umin_vec(vece, n, n, maxv);
tcg_gen_shli_vec(vece, n, n, halfbits);
- tcg_gen_bitsel_vec(vece, d, t, d, n);
+ tcg_gen_bitsel_vec(vece, d, maxv, d, n);
}
static const GVecGen2 uqxtnt_ops[3] = {
@@ -6450,14 +6439,11 @@ static const TCGOpcode sqxtun_list[] = {
static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = (1ull << halfbits) - 1;
- tcg_gen_dupi_vec(vece, t, 0);
- tcg_gen_smax_vec(vece, d, n, t);
- tcg_gen_dupi_vec(vece, t, max);
- tcg_gen_umin_vec(vece, d, d, t);
+ tcg_gen_smax_vec(vece, d, n, tcg_constant_vec_matching(d, vece, 0));
+ tcg_gen_umin_vec(vece, d, d, tcg_constant_vec_matching(d, vece, max));
}
static const GVecGen2 sqxtunb_ops[3] = {
@@ -6478,16 +6464,14 @@ TRANS_FEAT(SQXTUNB, aa64_sve2, do_narrow_extract, a, sqxtunb_ops)
static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = (1ull << halfbits) - 1;
+ TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max);
- tcg_gen_dupi_vec(vece, t, 0);
- tcg_gen_smax_vec(vece, n, n, t);
- tcg_gen_dupi_vec(vece, t, max);
- tcg_gen_umin_vec(vece, n, n, t);
+ tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, 0));
+ tcg_gen_umin_vec(vece, n, n, maxv);
tcg_gen_shli_vec(vece, n, n, halfbits);
- tcg_gen_bitsel_vec(vece, d, t, d, n);
+ tcg_gen_bitsel_vec(vece, d, maxv, d, n);
}
static const GVecGen2 sqxtunt_ops[3] = {
@@ -6551,13 +6535,11 @@ static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
tcg_gen_shri_vec(vece, n, n, shr);
- tcg_gen_dupi_vec(vece, t, mask);
- tcg_gen_and_vec(vece, d, n, t);
+ tcg_gen_and_vec(vece, d, n, tcg_constant_vec_matching(d, vece, mask));
}
static const TCGOpcode shrnb_vec_list[] = { INDEX_op_shri_vec, 0 };
@@ -6609,13 +6591,11 @@ static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
tcg_gen_shli_vec(vece, n, n, halfbits - shr);
- tcg_gen_dupi_vec(vece, t, mask);
- tcg_gen_bitsel_vec(vece, d, t, d, n);
+ tcg_gen_bitsel_vec(vece, d, tcg_constant_vec_matching(d, vece, mask), d, n);
}
static const TCGOpcode shrnt_vec_list[] = { INDEX_op_shli_vec, 0 };
@@ -6658,14 +6638,12 @@ TRANS_FEAT(RSHRNT, aa64_sve2, do_shr_narrow, a, rshrnt_ops)
static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
TCGv_vec n, int64_t shr)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
+ uint64_t max = MAKE_64BIT_MASK(0, halfbits);
tcg_gen_sari_vec(vece, n, n, shr);
- tcg_gen_dupi_vec(vece, t, 0);
- tcg_gen_smax_vec(vece, n, n, t);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
- tcg_gen_umin_vec(vece, d, n, t);
+ tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, 0));
+ tcg_gen_umin_vec(vece, d, n, tcg_constant_vec_matching(d, vece, max));
}
static const TCGOpcode sqshrunb_vec_list[] = {
@@ -6690,16 +6668,15 @@ TRANS_FEAT(SQSHRUNB, aa64_sve2, do_shr_narrow, a, sqshrunb_ops)
static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
TCGv_vec n, int64_t shr)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
+ uint64_t max = MAKE_64BIT_MASK(0, halfbits);
+ TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max);
tcg_gen_sari_vec(vece, n, n, shr);
- tcg_gen_dupi_vec(vece, t, 0);
- tcg_gen_smax_vec(vece, n, n, t);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
- tcg_gen_umin_vec(vece, n, n, t);
+ tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, 0));
+ tcg_gen_umin_vec(vece, n, n, maxv);
tcg_gen_shli_vec(vece, n, n, halfbits);
- tcg_gen_bitsel_vec(vece, d, t, d, n);
+ tcg_gen_bitsel_vec(vece, d, maxv, d, n);
}
static const TCGOpcode sqshrunt_vec_list[] = {
@@ -6742,18 +6719,15 @@ TRANS_FEAT(SQRSHRUNT, aa64_sve2, do_shr_narrow, a, sqrshrunt_ops)
static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d,
TCGv_vec n, int64_t shr)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
int64_t min = -max - 1;
+ int64_t mask = MAKE_64BIT_MASK(0, halfbits);
tcg_gen_sari_vec(vece, n, n, shr);
- tcg_gen_dupi_vec(vece, t, min);
- tcg_gen_smax_vec(vece, n, n, t);
- tcg_gen_dupi_vec(vece, t, max);
- tcg_gen_smin_vec(vece, n, n, t);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
- tcg_gen_and_vec(vece, d, n, t);
+ tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, min));
+ tcg_gen_smin_vec(vece, n, n, tcg_constant_vec_matching(d, vece, max));
+ tcg_gen_and_vec(vece, d, n, tcg_constant_vec_matching(d, vece, mask));
}
static const TCGOpcode sqshrnb_vec_list[] = {
@@ -6778,19 +6752,16 @@ TRANS_FEAT(SQSHRNB, aa64_sve2, do_shr_narrow, a, sqshrnb_ops)
static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d,
TCGv_vec n, int64_t shr)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
int64_t min = -max - 1;
+ int64_t mask = MAKE_64BIT_MASK(0, halfbits);
tcg_gen_sari_vec(vece, n, n, shr);
- tcg_gen_dupi_vec(vece, t, min);
- tcg_gen_smax_vec(vece, n, n, t);
- tcg_gen_dupi_vec(vece, t, max);
- tcg_gen_smin_vec(vece, n, n, t);
+ tcg_gen_smax_vec(vece, n, n, tcg_constant_vec_matching(d, vece, min));
+ tcg_gen_smin_vec(vece, n, n, tcg_constant_vec_matching(d, vece, max));
tcg_gen_shli_vec(vece, n, n, halfbits);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
- tcg_gen_bitsel_vec(vece, d, t, d, n);
+ tcg_gen_bitsel_vec(vece, d, tcg_constant_vec_matching(d, vece, mask), d, n);
}
static const TCGOpcode sqshrnt_vec_list[] = {
@@ -6833,12 +6804,11 @@ TRANS_FEAT(SQRSHRNT, aa64_sve2, do_shr_narrow, a, sqrshrnt_ops)
static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d,
TCGv_vec n, int64_t shr)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
+ int64_t max = MAKE_64BIT_MASK(0, halfbits);
tcg_gen_shri_vec(vece, n, n, shr);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
- tcg_gen_umin_vec(vece, d, n, t);
+ tcg_gen_umin_vec(vece, d, n, tcg_constant_vec_matching(d, vece, max));
}
static const TCGOpcode uqshrnb_vec_list[] = {
@@ -6863,14 +6833,14 @@ TRANS_FEAT(UQSHRNB, aa64_sve2, do_shr_narrow, a, uqshrnb_ops)
static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d,
TCGv_vec n, int64_t shr)
{
- TCGv_vec t = tcg_temp_new_vec_matching(d);
int halfbits = 4 << vece;
+ int64_t max = MAKE_64BIT_MASK(0, halfbits);
+ TCGv_vec maxv = tcg_constant_vec_matching(d, vece, max);
tcg_gen_shri_vec(vece, n, n, shr);
- tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
- tcg_gen_umin_vec(vece, n, n, t);
+ tcg_gen_umin_vec(vece, n, n, maxv);
tcg_gen_shli_vec(vece, n, n, halfbits);
- tcg_gen_bitsel_vec(vece, d, t, d, n);
+ tcg_gen_bitsel_vec(vece, d, maxv, d, n);
}
static const TCGOpcode uqshrnt_vec_list[] = {
--
2.43.0