25.06.2024 21:35, Richard Henderson wrote:
> The inner loop, bounded by eltspersegment, must not be
> larger than the outer loop, bounded by elements.
>
> Cc: qemu-stable@nongnu.org
> Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/arm/tcg/vec_helper.c | 24 ++++++++++++++++--------
> 1 file changed, 16 insertions(+), 8 deletions(-)
If my understanding is correct, this one
Fixes: f80701cb44d3 ("target/arm: Convert SQDMULH, SQRDMULH to decodetree")
and before this commit, there was no issue.
Is my understanding correct?
Thanks,
/mjt
> diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
> index 7b34cc98af..d477479bb1 100644
> --- a/target/arm/tcg/vec_helper.c
> +++ b/target/arm/tcg/vec_helper.c
> @@ -317,10 +317,12 @@ void HELPER(neon_sqdmulh_idx_h)(void *vd, void *vn, void *vm,
> intptr_t i, j, opr_sz = simd_oprsz(desc);
> int idx = simd_data(desc);
> int16_t *d = vd, *n = vn, *m = (int16_t *)vm + H2(idx);
> + intptr_t elements = opr_sz / 2;
> + intptr_t eltspersegment = MIN(16 / 2, elements);
>
> - for (i = 0; i < opr_sz / 2; i += 16 / 2) {
> + for (i = 0; i < elements; i += 16 / 2) {
> int16_t mm = m[i];
> - for (j = 0; j < 16 / 2; ++j) {
> + for (j = 0; j < eltspersegment; ++j) {
> d[i + j] = do_sqrdmlah_h(n[i + j], mm, 0, false, false, vq);
> }
> }
> @@ -333,10 +335,12 @@ void HELPER(neon_sqrdmulh_idx_h)(void *vd, void *vn, void *vm,
> intptr_t i, j, opr_sz = simd_oprsz(desc);
> int idx = simd_data(desc);
> int16_t *d = vd, *n = vn, *m = (int16_t *)vm + H2(idx);
> + intptr_t elements = opr_sz / 2;
> + intptr_t eltspersegment = MIN(16 / 2, elements);
>
> - for (i = 0; i < opr_sz / 2; i += 16 / 2) {
> + for (i = 0; i < elements; i += 16 / 2) {
> int16_t mm = m[i];
> - for (j = 0; j < 16 / 2; ++j) {
> + for (j = 0; j < eltspersegment; ++j) {
> d[i + j] = do_sqrdmlah_h(n[i + j], mm, 0, false, true, vq);
> }
> }
> @@ -512,10 +516,12 @@ void HELPER(neon_sqdmulh_idx_s)(void *vd, void *vn, void *vm,
> intptr_t i, j, opr_sz = simd_oprsz(desc);
> int idx = simd_data(desc);
> int32_t *d = vd, *n = vn, *m = (int32_t *)vm + H4(idx);
> + intptr_t elements = opr_sz / 4;
> + intptr_t eltspersegment = MIN(16 / 4, elements);
>
> - for (i = 0; i < opr_sz / 4; i += 16 / 4) {
> + for (i = 0; i < elements; i += 16 / 4) {
> int32_t mm = m[i];
> - for (j = 0; j < 16 / 4; ++j) {
> + for (j = 0; j < eltspersegment; ++j) {
> d[i + j] = do_sqrdmlah_s(n[i + j], mm, 0, false, false, vq);
> }
> }
> @@ -528,10 +534,12 @@ void HELPER(neon_sqrdmulh_idx_s)(void *vd, void *vn, void *vm,
> intptr_t i, j, opr_sz = simd_oprsz(desc);
> int idx = simd_data(desc);
> int32_t *d = vd, *n = vn, *m = (int32_t *)vm + H4(idx);
> + intptr_t elements = opr_sz / 4;
> + intptr_t eltspersegment = MIN(16 / 4, elements);
>
> - for (i = 0; i < opr_sz / 4; i += 16 / 4) {
> + for (i = 0; i < elements; i += 16 / 4) {
> int32_t mm = m[i];
> - for (j = 0; j < 16 / 4; ++j) {
> + for (j = 0; j < eltspersegment; ++j) {
> d[i + j] = do_sqrdmlah_s(n[i + j], mm, 0, false, true, vq);
> }
> }
--
GPG Key transition (from rsa2048 to rsa4096) since 2024-04-24.
New key: rsa4096/61AD3D98ECDF2C8E 9D8B E14E 3F2A 9DD7 9199 28F1 61AD 3D98 ECDF 2C8E
Old key: rsa2048/457CE0A0804465C5 6EE1 95D1 886E 8FFB 810D 4324 457C E0A0 8044 65C5
Transition statement: http://www.corpit.ru/mjt/gpg-transition-2024.txt