From nobody Tue Feb 10 20:14:22 2026
Delivered-To: importer@patchew.org
Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as
 permitted sender) client-ip=208.118.235.17;
 envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org;
 helo=lists.gnu.org;
Authentication-Results: mx.zohomail.com;
	dkim=fail;
	spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted
 sender)  smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org;
	dmarc=fail(p=none dis=none)  header.from=linaro.org
Return-Path: <qemu-devel-bounces+importer=patchew.org@nongnu.org>
Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by
 mx.zohomail.com
	with SMTPS id 1518304373580502.40881345972446;
 Sat, 10 Feb 2018 15:12:53 -0800 (PST)
Received: from localhost ([::1]:46306 helo=lists.gnu.org)
	by lists.gnu.org with esmtp (Exim 4.71)
	(envelope-from <qemu-devel-bounces+importer=patchew.org@nongnu.org>)
	id 1ekeKC-0003SX-Ki
	for importer@patchew.org; Sat, 10 Feb 2018 18:12:52 -0500
Received: from eggs.gnu.org ([2001:4830:134:3::10]:58773)
	by lists.gnu.org with esmtp (Exim 4.71)
	(envelope-from <richard.henderson@linaro.org>) id 1ekeDL-0006eW-W3
	for qemu-devel@nongnu.org; Sat, 10 Feb 2018 18:05:52 -0500
Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71)
	(envelope-from <richard.henderson@linaro.org>) id 1ekeDG-0008Aa-3Y
	for qemu-devel@nongnu.org; Sat, 10 Feb 2018 18:05:47 -0500
Received: from mail-pg0-x241.google.com ([2607:f8b0:400e:c05::241]:32944)
	by eggs.gnu.org with esmtps (TLS1.0:RSA_AES_128_CBC_SHA1:16)
	(Exim 4.71) (envelope-from <richard.henderson@linaro.org>)
	id 1ekeDF-00089L-O5
	for qemu-devel@nongnu.org; Sat, 10 Feb 2018 18:05:41 -0500
Received: by mail-pg0-x241.google.com with SMTP id u1so5488643pgr.0
	for <qemu-devel@nongnu.org>; Sat, 10 Feb 2018 15:05:41 -0800 (PST)
Received: from cloudburst.twiddle.net (174-21-6-47.tukw.qwest.net.
	[174.21.6.47]) by smtp.gmail.com with ESMTPSA id
	w10sm14122374pgr.57.2018.02.10.15.05.38
	(version=TLS1_2 cipher=ECDHE-RSA-CHACHA20-POLY1305 bits=256/256);
	Sat, 10 Feb 2018 15:05:39 -0800 (PST)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linaro.org; s=google;
	h=from:to:cc:subject:date:message-id:in-reply-to:references;
	bh=0RZ/Dwu85IRJts+xxg49wprCBup0YeyQq2OHAWxr7S4=;
	b=jEnZiYXJB+t/RICbmVxBMRr8hio7Zzfw+/NhrIkVATu/DRFlXLMaKbv19Scz+6+/7j
	pD7uFIkeOikmQO2KUa+m0kU6VNJfzbTlzItpZoJM6k35CysQGSUp2aWk+B71ofdHZl3Y
	Cu3UY0RclRVfPKPS2wCSGwomYk6K5E0crj9wE=
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
	d=1e100.net; s=20161025;
	h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to
	:references;
	bh=0RZ/Dwu85IRJts+xxg49wprCBup0YeyQq2OHAWxr7S4=;
	b=CF3zHwOnY5kxLSDkwa+NTS8RMuEIEnrWKEP1wv+hGMlhboYEbQ3tdOC8uMDg8mnH2C
	tX8GYIT3RtbHjIfdo6phXXA5m31owk0urVej6ai7/fCHlbXkEZJnPHSvNZE5AB75bn6s
	DO6SxKWYGdq+TCHh9FJ6wQ2oB929pFZnoAPyyWZe6/FLuGDEtMLhb8pGr3XemnUuqmjw
	bN6tZ5WsQ670LbGiDcGZELHkTEaKLmEC/AfZUF+YoBqDnS/38V4vupPPCIb11K+p94r/
	bmluUSpAPaPAcEDGue0ZtitwuiBXVq8epkvH0kr12y61cdWlV5V/NxAcaz8wZnlBnjZr
	eX0A==
X-Gm-Message-State: APf1xPDby8Ej6WaJm3KwQrtalkwgqRDWO2dWFm7IGTEwNejuEbB5BN9f
	hk7hs0hIgbhmInMWZADNZ3yotz8f19A=
X-Google-Smtp-Source: 
 AH8x224GIuMB/hrMHrliUfJ7xxs0xFLcMt0W84ZHnj/0F8Ku9Sb4Hozqp3mYh7Bh7KjeRXWLdJCQqA==
X-Received: by 10.101.71.129 with SMTP id e1mr5865207pgs.430.1518303940329;
	Sat, 10 Feb 2018 15:05:40 -0800 (PST)
From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Date: Sat, 10 Feb 2018 15:05:30 -0800
Message-Id: <20180210230530.8421-6-richard.henderson@linaro.org>
X-Mailer: git-send-email 2.14.3
In-Reply-To: <20180210230530.8421-1-richard.henderson@linaro.org>
References: <20180210230530.8421-1-richard.henderson@linaro.org>
X-detected-operating-system: by eggs.gnu.org: Genre and OS details not
	recognized.
X-Received-From: 2607:f8b0:400e:c05::241
Subject: [Qemu-devel] [PATCH 5/5] target/arm: Handle SVE registers when
 using clear_vec_high
X-BeenThere: qemu-devel@nongnu.org
X-Mailman-Version: 2.1.21
Precedence: list
List-Id: <qemu-devel.nongnu.org>
List-Unsubscribe: <https://lists.nongnu.org/mailman/options/qemu-devel>,
	<mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>
List-Archive: <http://lists.nongnu.org/archive/html/qemu-devel/>
List-Post: <mailto:qemu-devel@nongnu.org>
List-Help: <mailto:qemu-devel-request@nongnu.org?subject=help>
List-Subscribe: <https://lists.nongnu.org/mailman/listinfo/qemu-devel>,
	<mailto:qemu-devel-request@nongnu.org?subject=subscribe>
Cc: peter.maydell@linaro.org, qemu-arm@nongnu.org
Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org
Sender: "Qemu-devel" <qemu-devel-bounces+importer=patchew.org@nongnu.org>
X-ZohoMail-DKIM: fail (Header signature does not verify)
X-ZohoMail: RDKM_2  RSF_0  Z_629925259 SPT_0
Content-Transfer-Encoding: quoted-printable
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"

When storing to an AdvSIMD FP register, all of the high
bits of the SVE register are zeroed.  Therefore, call it
more often with is_q as a parameter.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/translate-a64.c | 162 +++++++++++++++++------------------------=
----
 1 file changed, 62 insertions(+), 100 deletions(-)

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index e3881d4999..1c88539d62 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -602,13 +602,30 @@ static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
     return v;
 }
=20
+/* Clear the bits above an N-bit vector, for N =3D (is_q ? 128 : 64).
+ * If SVE is not enabled, then there are only 128 bits in the vector.
+ */
+static void clear_vec_high(DisasContext *s, bool is_q, int rd)
+{
+    unsigned ofs =3D fp_reg_offset(s, rd, MO_64);
+    unsigned vsz =3D vec_full_reg_size(s);
+
+    if (!is_q) {
+        TCGv_i64 tcg_zero =3D tcg_const_i64(0);
+        tcg_gen_st_i64(tcg_zero, cpu_env, ofs + 8);
+        tcg_temp_free_i64(tcg_zero);
+    }
+    if (vsz > 16) {
+        tcg_gen_gvec_dup8i(ofs + 16, vsz - 16, vsz - 16, 0);
+    }
+}
+
 static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
 {
-    TCGv_i64 tcg_zero =3D tcg_const_i64(0);
+    unsigned ofs =3D fp_reg_offset(s, reg, MO_64);
=20
-    tcg_gen_st_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
-    tcg_gen_st_i64(tcg_zero, cpu_env, fp_reg_hi_offset(s, reg));
-    tcg_temp_free_i64(tcg_zero);
+    tcg_gen_st_i64(v, cpu_env, ofs);
+    clear_vec_high(s, false, reg);
 }
=20
 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
@@ -1009,6 +1026,8 @@ static void do_fp_ld(DisasContext *s, int destidx, TC=
Gv_i64 tcg_addr, int size)
=20
     tcg_temp_free_i64(tmplo);
     tcg_temp_free_i64(tmphi);
+
+    clear_vec_high(s, true, destidx);
 }
=20
 /*
@@ -1124,17 +1143,6 @@ static void write_vec_element_i32(DisasContext *s, T=
CGv_i32 tcg_src,
     }
 }
=20
-/* Clear the high 64 bits of a 128 bit vector (in general non-quad
- * vector ops all need to do this).
- */
-static void clear_vec_high(DisasContext *s, int rd)
-{
-    TCGv_i64 tcg_zero =3D tcg_const_i64(0);
-
-    write_vec_element(s, tcg_zero, rd, 1, MO_64);
-    tcg_temp_free_i64(tcg_zero);
-}
-
 /* Store from vector register to memory */
 static void do_vec_st(DisasContext *s, int srcidx, int element,
                       TCGv_i64 tcg_addr, int size)
@@ -2794,12 +2802,13 @@ static void disas_ldst_multiple_struct(DisasContext=
 *s, uint32_t insn)
                     /* For non-quad operations, setting a slice of the low
                      * 64 bits of the register clears the high 64 bits (in
                      * the ARM ARM pseudocode this is implicit in the fact
-                     * that 'rval' is a 64 bit wide variable). We optimize
-                     * by noticing that we only need to do this the first
-                     * time we touch a register.
+                     * that 'rval' is a 64 bit wide variable).
+                     * For quad operations, we might still need to zero the
+                     * high bits of SVE.  We optimize by noticing that we =
only
+                     * need to do this the first time we touch a register.
                      */
-                    if (!is_q && e =3D=3D 0 && (r =3D=3D 0 || xs =3D=3D se=
lem - 1)) {
-                        clear_vec_high(s, tt);
+                    if (e =3D=3D 0 && (r =3D=3D 0 || xs =3D=3D selem - 1))=
 {
+                        clear_vec_high(s, is_q, tt);
                     }
                 }
                 tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
@@ -2942,10 +2951,9 @@ static void disas_ldst_single_struct(DisasContext *s=
, uint32_t insn)
             write_vec_element(s, tcg_tmp, rt, 0, MO_64);
             if (is_q) {
                 write_vec_element(s, tcg_tmp, rt, 1, MO_64);
-            } else {
-                clear_vec_high(s, rt);
             }
             tcg_temp_free_i64(tcg_tmp);
+            clear_vec_high(s, is_q, rt);
         } else {
             /* Load/store one element per register */
             if (is_load) {
@@ -6718,7 +6726,6 @@ static void handle_vec_simd_sqshrn(DisasContext *s, b=
ool is_scalar, bool is_q,
     }
=20
     if (!is_q) {
-        clear_vec_high(s, rd);
         write_vec_element(s, tcg_final, rd, 0, MO_64);
     } else {
         write_vec_element(s, tcg_final, rd, 1, MO_64);
@@ -6731,7 +6738,8 @@ static void handle_vec_simd_sqshrn(DisasContext *s, b=
ool is_scalar, bool is_q,
     tcg_temp_free_i64(tcg_rd);
     tcg_temp_free_i32(tcg_rd_narrowed);
     tcg_temp_free_i64(tcg_final);
-    return;
+
+    clear_vec_high(s, is_q, rd);
 }
=20
 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
@@ -6795,10 +6803,7 @@ static void handle_simd_qshl(DisasContext *s, bool s=
calar, bool is_q,
             tcg_temp_free_i64(tcg_op);
         }
         tcg_temp_free_i64(tcg_shift);
-
-        if (!is_q) {
-            clear_vec_high(s, rd);
-        }
+        clear_vec_high(s, is_q, rd);
     } else {
         TCGv_i32 tcg_shift =3D tcg_const_i32(shift);
         static NeonGenTwoOpEnvFn * const fns[2][2][3] =3D {
@@ -6847,8 +6852,8 @@ static void handle_simd_qshl(DisasContext *s, bool sc=
alar, bool is_q,
         }
         tcg_temp_free_i32(tcg_shift);
=20
-        if (!is_q && !scalar) {
-            clear_vec_high(s, rd);
+        if (!scalar) {
+            clear_vec_high(s, is_q, rd);
         }
     }
 }
@@ -6901,13 +6906,11 @@ static void handle_simd_intfp_conv(DisasContext *s,=
 int rd, int rn,
         }
     }
=20
-    if (!is_double && elements =3D=3D 2) {
-        clear_vec_high(s, rd);
-    }
-
     tcg_temp_free_i64(tcg_int);
     tcg_temp_free_ptr(tcg_fpst);
     tcg_temp_free_i32(tcg_shift);
+
+    clear_vec_high(s, elements << size =3D=3D 16, rd);
 }
=20
 /* UCVTF/SCVTF - Integer to FP conversion */
@@ -6995,9 +6998,7 @@ static void handle_simd_shift_fpint_conv(DisasContext=
 *s, bool is_scalar,
             write_vec_element(s, tcg_op, rd, pass, MO_64);
             tcg_temp_free_i64(tcg_op);
         }
-        if (!is_q) {
-            clear_vec_high(s, rd);
-        }
+        clear_vec_high(s, is_q, rd);
     } else {
         int maxpass =3D is_scalar ? 1 : is_q ? 4 : 2;
         for (pass =3D 0; pass < maxpass; pass++) {
@@ -7016,8 +7017,8 @@ static void handle_simd_shift_fpint_conv(DisasContext=
 *s, bool is_scalar,
             }
             tcg_temp_free_i32(tcg_op);
         }
-        if (!is_q && !is_scalar) {
-            clear_vec_high(s, rd);
+        if (!is_scalar) {
+            clear_vec_high(s, is_q, rd);
         }
     }
=20
@@ -7502,10 +7503,7 @@ static void handle_3same_float(DisasContext *s, int =
size, int elements,
=20
     tcg_temp_free_ptr(fpst);
=20
-    if ((elements << size) < 4) {
-        /* scalar, or non-quad vector op */
-        clear_vec_high(s, rd);
-    }
+    clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd);
 }
=20
 /* AdvSIMD scalar three same
@@ -7831,13 +7829,11 @@ static void handle_2misc_fcmp_zero(DisasContext *s,=
 int opcode,
             }
             write_vec_element(s, tcg_res, rd, pass, MO_64);
         }
-        if (is_scalar) {
-            clear_vec_high(s, rd);
-        }
-
         tcg_temp_free_i64(tcg_res);
         tcg_temp_free_i64(tcg_zero);
         tcg_temp_free_i64(tcg_op);
+
+        clear_vec_high(s, !is_scalar, rd);
     } else {
         TCGv_i32 tcg_op =3D tcg_temp_new_i32();
         TCGv_i32 tcg_zero =3D tcg_const_i32(0);
@@ -7888,8 +7884,8 @@ static void handle_2misc_fcmp_zero(DisasContext *s, i=
nt opcode,
         tcg_temp_free_i32(tcg_res);
         tcg_temp_free_i32(tcg_zero);
         tcg_temp_free_i32(tcg_op);
-        if (!is_q && !is_scalar) {
-            clear_vec_high(s, rd);
+        if (!is_scalar) {
+            clear_vec_high(s, is_q, rd);
         }
     }
=20
@@ -7925,12 +7921,9 @@ static void handle_2misc_reciprocal(DisasContext *s,=
 int opcode,
             }
             write_vec_element(s, tcg_res, rd, pass, MO_64);
         }
-        if (is_scalar) {
-            clear_vec_high(s, rd);
-        }
-
         tcg_temp_free_i64(tcg_res);
         tcg_temp_free_i64(tcg_op);
+        clear_vec_high(s, !is_scalar, rd);
     } else {
         TCGv_i32 tcg_op =3D tcg_temp_new_i32();
         TCGv_i32 tcg_res =3D tcg_temp_new_i32();
@@ -7970,8 +7963,8 @@ static void handle_2misc_reciprocal(DisasContext *s, =
int opcode,
         }
         tcg_temp_free_i32(tcg_res);
         tcg_temp_free_i32(tcg_op);
-        if (!is_q && !is_scalar) {
-            clear_vec_high(s, rd);
+        if (!is_scalar) {
+            clear_vec_high(s, is_q, rd);
         }
     }
     tcg_temp_free_ptr(fpst);
@@ -8077,9 +8070,7 @@ static void handle_2misc_narrow(DisasContext *s, bool=
 scalar,
         write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
         tcg_temp_free_i32(tcg_res[pass]);
     }
-    if (!is_q) {
-        clear_vec_high(s, rd);
-    }
+    clear_vec_high(s, is_q, rd);
 }
=20
 /* Remaining saturating accumulating ops */
@@ -8104,12 +8095,9 @@ static void handle_2misc_satacc(DisasContext *s, boo=
l is_scalar, bool is_u,
             }
             write_vec_element(s, tcg_rd, rd, pass, MO_64);
         }
-        if (is_scalar) {
-            clear_vec_high(s, rd);
-        }
-
         tcg_temp_free_i64(tcg_rd);
         tcg_temp_free_i64(tcg_rn);
+        clear_vec_high(s, !is_scalar, rd);
     } else {
         TCGv_i32 tcg_rn =3D tcg_temp_new_i32();
         TCGv_i32 tcg_rd =3D tcg_temp_new_i32();
@@ -8167,13 +8155,9 @@ static void handle_2misc_satacc(DisasContext *s, boo=
l is_scalar, bool is_u,
             }
             write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
         }
-
-        if (!is_q) {
-            clear_vec_high(s, rd);
-        }
-
         tcg_temp_free_i32(tcg_rd);
         tcg_temp_free_i32(tcg_rn);
+        clear_vec_high(s, is_q, rd);
     }
 }
=20
@@ -8664,9 +8648,7 @@ static void handle_vec_simd_shri(DisasContext *s, boo=
l is_q, bool is_u,
     tcg_temp_free_i64(tcg_round);
=20
  done:
-    if (!is_q) {
-        clear_vec_high(s, rd);
-    }
+    clear_vec_high(s, is_q, rd);
 }
=20
 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
@@ -8855,19 +8837,18 @@ static void handle_vec_simd_shrn(DisasContext *s, b=
ool is_q,
     }
=20
     if (!is_q) {
-        clear_vec_high(s, rd);
         write_vec_element(s, tcg_final, rd, 0, MO_64);
     } else {
         write_vec_element(s, tcg_final, rd, 1, MO_64);
     }
-
     if (round) {
         tcg_temp_free_i64(tcg_round);
     }
     tcg_temp_free_i64(tcg_rn);
     tcg_temp_free_i64(tcg_rd);
     tcg_temp_free_i64(tcg_final);
-    return;
+
+    clear_vec_high(s, is_q, rd);
 }
=20
=20
@@ -9261,9 +9242,7 @@ static void handle_3rd_narrowing(DisasContext *s, int=
 is_q, int is_u, int size,
         write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
         tcg_temp_free_i32(tcg_res[pass]);
     }
-    if (!is_q) {
-        clear_vec_high(s, rd);
-    }
+    clear_vec_high(s, is_q, rd);
 }
=20
 static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int=
 rm)
@@ -9671,9 +9650,7 @@ static void handle_simd_3same_pair(DisasContext *s, i=
nt is_q, int u, int opcode,
             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
             tcg_temp_free_i32(tcg_res[pass]);
         }
-        if (!is_q) {
-            clear_vec_high(s, rd);
-        }
+        clear_vec_high(s, is_q, rd);
     }
=20
     if (fpst) {
@@ -10161,10 +10138,7 @@ static void disas_simd_3same_int(DisasContext *s, =
uint32_t insn)
             tcg_temp_free_i32(tcg_op2);
         }
     }
-
-    if (!is_q) {
-        clear_vec_high(s, rd);
-    }
+    clear_vec_high(s, is_q, rd);
 }
=20
 /* AdvSIMD three same
@@ -10303,9 +10277,7 @@ static void handle_rev(DisasContext *s, int opcode,=
 bool u,
             write_vec_element(s, tcg_tmp, rd, i, grp_size);
             tcg_temp_free_i64(tcg_tmp);
         }
-        if (!is_q) {
-            clear_vec_high(s, rd);
-        }
+        clear_vec_high(s, is_q, rd);
     } else {
         int revmask =3D (1 << grp_size) - 1;
         int esize =3D 8 << size;
@@ -10949,9 +10921,7 @@ static void disas_simd_two_reg_misc(DisasContext *s=
, uint32_t insn)
             tcg_temp_free_i32(tcg_op);
         }
     }
-    if (!is_q) {
-        clear_vec_high(s, rd);
-    }
+    clear_vec_high(s, is_q, rd);
=20
     if (need_rmode) {
         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
@@ -11130,11 +11100,8 @@ static void disas_simd_indexed(DisasContext *s, ui=
nt32_t insn)
             tcg_temp_free_i64(tcg_res);
         }
=20
-        if (is_scalar) {
-            clear_vec_high(s, rd);
-        }
-
         tcg_temp_free_i64(tcg_idx);
+        clear_vec_high(s, !is_scalar, rd);
     } else if (!is_long) {
         /* 32 bit floating point, or 16 or 32 bit integer.
          * For the 16 bit scalar case we use the usual Neon helpers and
@@ -11238,10 +11205,7 @@ static void disas_simd_indexed(DisasContext *s, ui=
nt32_t insn)
         }
=20
         tcg_temp_free_i32(tcg_idx);
-
-        if (!is_q) {
-            clear_vec_high(s, rd);
-        }
+        clear_vec_high(s, is_q, rd);
     } else {
         /* long ops: 16x16->32 or 32x32->64 */
         TCGv_i64 tcg_res[2];
@@ -11318,9 +11282,7 @@ static void disas_simd_indexed(DisasContext *s, uin=
t32_t insn)
             }
             tcg_temp_free_i64(tcg_idx);
=20
-            if (is_scalar) {
-                clear_vec_high(s, rd);
-            }
+            clear_vec_high(s, !is_scalar, rd);
         } else {
             TCGv_i32 tcg_idx =3D tcg_temp_new_i32();
=20
--=20
2.14.3