Honor AlignmentEnforced() for LDR/STR (vector),
(predicate), and (array vector).
Within the expansion functions, clear @align when we're
done emitting loads at the largest size.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/translate-a64.h | 6 ++--
target/arm/tcg/translate-sme.c | 5 ++--
target/arm/tcg/translate-sve.c | 50 ++++++++++++++++++++++++----------
3 files changed, 42 insertions(+), 19 deletions(-)
diff --git a/target/arm/tcg/translate-a64.h b/target/arm/tcg/translate-a64.h
index b2420f59eb..8a9f4b2053 100644
--- a/target/arm/tcg/translate-a64.h
+++ b/target/arm/tcg/translate-a64.h
@@ -225,7 +225,9 @@ void gen_gvec_usqadd_qc(unsigned vece, uint32_t rd_ofs,
uint32_t rn_ofs, uint32_t rm_ofs,
uint32_t opr_sz, uint32_t max_sz);
-void gen_sve_ldr(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm);
-void gen_sve_str(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm);
+void gen_sve_ldr(DisasContext *s, TCGv_ptr, int vofs,
+ int len, int rn, int imm, MemOp align);
+void gen_sve_str(DisasContext *s, TCGv_ptr, int vofs,
+ int len, int rn, int imm, MemOp align);
#endif /* TARGET_ARM_TRANSLATE_A64_H */
diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c
index 246f191eca..1dbd0199af 100644
--- a/target/arm/tcg/translate-sme.c
+++ b/target/arm/tcg/translate-sme.c
@@ -269,7 +269,7 @@ static bool trans_LDST1(DisasContext *s, arg_LDST1 *a)
return true;
}
-typedef void GenLdStR(DisasContext *, TCGv_ptr, int, int, int, int);
+typedef void GenLdStR(DisasContext *, TCGv_ptr, int, int, int, int, MemOp);
static bool do_ldst_r(DisasContext *s, arg_ldstr *a, GenLdStR *fn)
{
@@ -284,7 +284,8 @@ static bool do_ldst_r(DisasContext *s, arg_ldstr *a, GenLdStR *fn)
/* ZA[n] equates to ZA0H.B[n]. */
base = get_tile_rowcol(s, MO_8, a->rv, imm, false);
- fn(s, base, 0, svl, a->rn, imm * svl);
+ fn(s, base, 0, svl, a->rn, imm * svl,
+ s->align_mem ? MO_ALIGN_16 : MO_UNALN);
return true;
}
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index 8403034a0e..d2cd298b5b 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -4160,7 +4160,7 @@ TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
*/
void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
- int len, int rn, int imm)
+ int len, int rn, int imm, MemOp align)
{
int len_align = QEMU_ALIGN_DOWN(len, 16);
int len_remain = len % 16;
@@ -4189,12 +4189,15 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
for (i = 0; i < len_align; i += 16) {
tcg_gen_qemu_ld_i128(t16, clean_addr, midx,
- MO_LE | MO_128 | MO_ATOM_NONE);
+ MO_LE | MO_128 | MO_ATOM_NONE | align);
tcg_gen_extr_i128_i64(t0, t1, t16);
tcg_gen_st_i64(t0, base, vofs + i);
tcg_gen_st_i64(t1, base, vofs + i + 8);
tcg_gen_addi_i64(clean_addr, clean_addr, 16);
}
+ if (len_align) {
+ align = MO_UNALN;
+ }
} else {
TCGLabel *loop = gen_new_label();
TCGv_ptr tp, i = tcg_temp_new_ptr();
@@ -4204,7 +4207,7 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
t16 = tcg_temp_new_i128();
tcg_gen_qemu_ld_i128(t16, clean_addr, midx,
- MO_LE | MO_128 | MO_ATOM_NONE);
+ MO_LE | MO_128 | MO_ATOM_NONE | align);
tcg_gen_addi_i64(clean_addr, clean_addr, 16);
tp = tcg_temp_new_ptr();
@@ -4219,6 +4222,7 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
tcg_gen_st_i64(t1, tp, vofs + 8);
tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
+ align = MO_UNALN;
}
/*
@@ -4227,7 +4231,9 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
*/
if (len_remain >= 8) {
t0 = tcg_temp_new_i64();
- tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE);
+ tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
+ MO_LEUQ | MO_ATOM_NONE | align);
+ align = MO_UNALN;
tcg_gen_st_i64(t0, base, vofs + len_align);
len_remain -= 8;
len_align += 8;
@@ -4242,12 +4248,14 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
case 4:
case 8:
tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
- MO_LE | ctz32(len_remain) | MO_ATOM_NONE);
+ MO_LE | ctz32(len_remain)
+ | MO_ATOM_NONE | align);
break;
case 6:
t1 = tcg_temp_new_i64();
- tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE);
+ tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
+ MO_LEUL | MO_ATOM_NONE | align);
tcg_gen_addi_i64(clean_addr, clean_addr, 4);
tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW | MO_ATOM_NONE);
tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
@@ -4262,7 +4270,7 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
/* Similarly for stores. */
void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
- int len, int rn, int imm)
+ int len, int rn, int imm, MemOp align)
{
int len_align = QEMU_ALIGN_DOWN(len, 16);
int len_remain = len % 16;
@@ -4294,9 +4302,12 @@ void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
tcg_gen_ld_i64(t1, base, vofs + i + 8);
tcg_gen_concat_i64_i128(t16, t0, t1);
tcg_gen_qemu_st_i128(t16, clean_addr, midx,
- MO_LE | MO_128 | MO_ATOM_NONE);
+ MO_LE | MO_128 | MO_ATOM_NONE | align);
tcg_gen_addi_i64(clean_addr, clean_addr, 16);
}
+ if (len_align) {
+ align = MO_UNALN;
+ }
} else {
TCGLabel *loop = gen_new_label();
TCGv_ptr tp, i = tcg_temp_new_ptr();
@@ -4320,13 +4331,16 @@ void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
tcg_gen_addi_i64(clean_addr, clean_addr, 16);
tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
+ align = MO_UNALN;
}
/* Predicate register stores can be any multiple of 2. */
if (len_remain >= 8) {
t0 = tcg_temp_new_i64();
tcg_gen_ld_i64(t0, base, vofs + len_align);
- tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE);
+ tcg_gen_qemu_st_i64(t0, clean_addr, midx,
+ MO_LEUQ | MO_ATOM_NONE | align);
+ align = MO_UNALN;
len_remain -= 8;
len_align += 8;
if (len_remain) {
@@ -4342,11 +4356,13 @@ void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
case 4:
case 8:
tcg_gen_qemu_st_i64(t0, clean_addr, midx,
- MO_LE | ctz32(len_remain) | MO_ATOM_NONE);
+ MO_LE | ctz32(len_remain)
+ | MO_ATOM_NONE | align);
break;
case 6:
- tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE);
+ tcg_gen_qemu_st_i64(t0, clean_addr, midx,
+ MO_LEUL | MO_ATOM_NONE | align);
tcg_gen_addi_i64(clean_addr, clean_addr, 4);
tcg_gen_shri_i64(t0, t0, 32);
tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW | MO_ATOM_NONE);
@@ -4366,7 +4382,8 @@ static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
if (sve_access_check(s)) {
int size = vec_full_reg_size(s);
int off = vec_full_reg_offset(s, a->rd);
- gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size);
+ gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size,
+ s->align_mem ? MO_ALIGN_16 : MO_UNALN);
}
return true;
}
@@ -4379,7 +4396,8 @@ static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
if (sve_access_check(s)) {
int size = pred_full_reg_size(s);
int off = pred_full_reg_offset(s, a->rd);
- gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size);
+ gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size,
+ s->align_mem ? MO_ALIGN_2 : MO_UNALN);
}
return true;
}
@@ -4392,7 +4410,8 @@ static bool trans_STR_zri(DisasContext *s, arg_rri *a)
if (sve_access_check(s)) {
int size = vec_full_reg_size(s);
int off = vec_full_reg_offset(s, a->rd);
- gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size);
+ gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size,
+ s->align_mem ? MO_ALIGN_16 : MO_UNALN);
}
return true;
}
@@ -4405,7 +4424,8 @@ static bool trans_STR_pri(DisasContext *s, arg_rri *a)
if (sve_access_check(s)) {
int size = pred_full_reg_size(s);
int off = pred_full_reg_offset(s, a->rd);
- gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size);
+ gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size,
+ s->align_mem ? MO_ALIGN_2 : MO_UNALN);
}
return true;
}
--
2.43.0