[PATCH v3] target/arm: Fix sve2 ldnt1 and stnt1

Richard Henderson posted 1 patch 2 years, 2 months ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20220308031655.240710-1-richard.henderson@linaro.org
Test checkpatch failed
Maintainers: Peter Maydell <peter.maydell@linaro.org>
target/arm/sve.decode             |  5 ++-
target/arm/translate-sve.c        | 51 +++++++++++++++++++++++++++++--
tests/tcg/aarch64/test-826.c      | 50 ++++++++++++++++++++++++++++++
tests/tcg/aarch64/Makefile.target |  4 +++
tests/tcg/configure.sh            |  4 +++
5 files changed, 109 insertions(+), 5 deletions(-)
create mode 100644 tests/tcg/aarch64/test-826.c
[PATCH v3] target/arm: Fix sve2 ldnt1 and stnt1
Posted by Richard Henderson 2 years, 2 months ago
For both ldnt1 and stnt1, the meaning of the Rn and Rm are different
from ld1 and st1: the vector and integer registers are reversed, and
the integer register 31 refers to XZR instead of SP.

Secondly, the 64-bit version of ldnt1 was being interpreted as
32-bit unpacked unscaled offset instead of 64-bit unscaled offset,
which discarded the upper 32 bits of the address coming from
the vector argument.

Thirdly, validate that the memory element size is in range for the
vector element size for ldnt1.  For ld1, we do this via independent
decode patterns, but for ldnt1 we need to do it manually.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/826
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/sve.decode             |  5 ++-
 target/arm/translate-sve.c        | 51 +++++++++++++++++++++++++++++--
 tests/tcg/aarch64/test-826.c      | 50 ++++++++++++++++++++++++++++++
 tests/tcg/aarch64/Makefile.target |  4 +++
 tests/tcg/configure.sh            |  4 +++
 5 files changed, 109 insertions(+), 5 deletions(-)
 create mode 100644 tests/tcg/aarch64/test-826.c

diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index c60b9f0fec..0388cce3bd 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -1575,10 +1575,9 @@ USDOT_zzzz      01000100 .. 0 ..... 011 110 ..... .....  @rda_rn_rm
 
 ### SVE2 Memory Gather Load Group
 
-# SVE2 64-bit gather non-temporal load
-#   (scalar plus unpacked 32-bit unscaled offsets)
+# SVE2 64-bit gather non-temporal load (scalar plus 64-bit unscaled offsets)
 LDNT1_zprz      1100010 msz:2 00 rm:5 1 u:1 0 pg:3 rn:5 rd:5 \
-                &rprr_gather_load xs=0 esz=3 scale=0 ff=0
+                &rprr_gather_load xs=2 esz=3 scale=0 ff=0
 
 # SVE2 32-bit gather non-temporal load (scalar plus 32-bit unscaled offsets)
 LDNT1_zprz      1000010 msz:2 00 rm:5 10 u:1 pg:3 rn:5 rd:5 \
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 33ca1bcfac..2c23459e76 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -6487,10 +6487,33 @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
 
 static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
 {
+    gen_helper_gvec_mem_scatter *fn = NULL;
+    bool be = s->be_data == MO_BE;
+    bool mte = s->mte_active[0];
+
+    if (a->esz < a->msz + !a->u) {
+        return false;
+    }
     if (!dc_isar_feature(aa64_sve2, s)) {
         return false;
     }
-    return trans_LD1_zprz(s, a);
+    if (!sve_access_check(s)) {
+        return true;
+    }
+
+    switch (a->esz) {
+    case MO_32:
+        fn = gather_load_fn32[mte][be][0][0][a->u][a->msz];
+        break;
+    case MO_64:
+        fn = gather_load_fn64[mte][be][0][2][a->u][a->msz];
+        break;
+    }
+    assert(fn != NULL);
+
+    do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
+               cpu_reg(s, a->rm), a->msz, false, fn);
+    return true;
 }
 
 /* Indexed by [mte][be][xs][msz].  */
@@ -6647,10 +6670,34 @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
 
 static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
 {
+    gen_helper_gvec_mem_scatter *fn;
+    bool be = s->be_data == MO_BE;
+    bool mte = s->mte_active[0];
+
+    if (a->esz < a->msz) {
+        return false;
+    }
     if (!dc_isar_feature(aa64_sve2, s)) {
         return false;
     }
-    return trans_ST1_zprz(s, a);
+    if (!sve_access_check(s)) {
+        return true;
+    }
+
+    switch (a->esz) {
+    case MO_32:
+        fn = scatter_store_fn32[mte][be][0][a->msz];
+        break;
+    case MO_64:
+        fn = scatter_store_fn64[mte][be][2][a->msz];
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
+               cpu_reg(s, a->rm), a->msz, true, fn);
+    return true;
 }
 
 /*
diff --git a/tests/tcg/aarch64/test-826.c b/tests/tcg/aarch64/test-826.c
new file mode 100644
index 0000000000..f59740a8c5
--- /dev/null
+++ b/tests/tcg/aarch64/test-826.c
@@ -0,0 +1,50 @@
+#include <sys/mman.h>
+#include <unistd.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+
+static void *expected;
+
+void sigsegv(int sig, siginfo_t *info, void *vuc)
+{
+    ucontext_t *uc = vuc;
+
+    assert(info->si_addr == expected);
+    uc->uc_mcontext.pc += 4;
+}
+
+int main()
+{
+    struct sigaction sa = {
+        .sa_sigaction = sigsegv,
+        .sa_flags = SA_SIGINFO
+    };
+
+    void *page;
+    long ofs;
+
+    if (sigaction(SIGSEGV, &sa, NULL) < 0) {
+        perror("sigaction");
+        return EXIT_FAILURE;
+    }
+
+    page = mmap(0, getpagesize(), PROT_NONE, MAP_PRIVATE | MAP_ANON, -1, 0);
+    if (page == MAP_FAILED) {
+        perror("mmap");
+        return EXIT_FAILURE;
+    }
+
+    ofs = 0x124;
+    expected = page + ofs;
+
+    asm("ptrue p0.d, vl1\n\t"
+        "dup z0.d, %0\n\t"
+        "ldnt1h {z1.d}, p0/z, [z0.d, %1]\n\t"
+        "dup z1.d, %1\n\t"
+        "ldnt1h {z0.d}, p0/z, [z1.d, %0]"
+        : : "r"(page), "r"(ofs) : "v0", "v1");
+
+    return EXIT_SUCCESS;
+}
diff --git a/tests/tcg/aarch64/Makefile.target b/tests/tcg/aarch64/Makefile.target
index ac07acde66..f7121cb4d8 100644
--- a/tests/tcg/aarch64/Makefile.target
+++ b/tests/tcg/aarch64/Makefile.target
@@ -86,7 +86,11 @@ run-gdbstub-sve-ioctls: sve-ioctls
 
 EXTRA_RUNS += run-gdbstub-sysregs run-gdbstub-sve-ioctls
 endif
+endif
 
+ifneq ($(DOCKER_IMAGE)$(CROSS_CC_HAS_SVE2),)
+AARCH64_TESTS += test-826
+test-826: CFLAGS+=-march=armv8.1-a+sve2
 endif
 
 TESTS += $(AARCH64_TESTS)
diff --git a/tests/tcg/configure.sh b/tests/tcg/configure.sh
index ed4b5ccb1f..84f928f7f8 100755
--- a/tests/tcg/configure.sh
+++ b/tests/tcg/configure.sh
@@ -299,6 +299,10 @@ for target in $target_list; do
                              -march=armv8.1-a+sve -o $TMPE $TMPC; then
                   echo "CROSS_CC_HAS_SVE=y" >> $config_target_mak
               fi
+              if do_compiler "$target_compiler" $target_compiler_cflags \
+                             -march=armv8.1-a+sve2 -o $TMPE $TMPC; then
+                  echo "CROSS_CC_HAS_SVE2=y" >> $config_target_mak
+              fi
               if do_compiler "$target_compiler" $target_compiler_cflags \
                              -march=armv8.3-a -o $TMPE $TMPC; then
                   echo "CROSS_CC_HAS_ARMV8_3=y" >> $config_target_mak
-- 
2.25.1
Re: [PATCH v3] target/arm: Fix sve2 ldnt1 and stnt1
Posted by Peter Maydell 2 years, 1 month ago
On Tue, 8 Mar 2022 at 03:18, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> For both ldnt1 and stnt1, the meaning of the Rn and Rm are different
> from ld1 and st1: the vector and integer registers are reversed, and
> the integer register 31 refers to XZR instead of SP.
>
> Secondly, the 64-bit version of ldnt1 was being interpreted as
> 32-bit unpacked unscaled offset instead of 64-bit unscaled offset,
> which discarded the upper 32 bits of the address coming from
> the vector argument.
>
> Thirdly, validate that the memory element size is in range for the
> vector element size for ldnt1.  For ld1, we do this via independent
> decode patterns, but for ldnt1 we need to do it manually.
>
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/826
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/arm/sve.decode             |  5 ++-
>  target/arm/translate-sve.c        | 51 +++++++++++++++++++++++++++++--
>  tests/tcg/aarch64/test-826.c      | 50 ++++++++++++++++++++++++++++++
>  tests/tcg/aarch64/Makefile.target |  4 +++
>  tests/tcg/configure.sh            |  4 +++
>  5 files changed, 109 insertions(+), 5 deletions(-)
>  create mode 100644 tests/tcg/aarch64/test-826.c
>
> diff --git a/target/arm/sve.decode b/target/arm/sve.decode
> index c60b9f0fec..0388cce3bd 100644
> --- a/target/arm/sve.decode
> +++ b/target/arm/sve.decode
> @@ -1575,10 +1575,9 @@ USDOT_zzzz      01000100 .. 0 ..... 011 110 ..... .....  @rda_rn_rm
>
>  ### SVE2 Memory Gather Load Group
>
> -# SVE2 64-bit gather non-temporal load
> -#   (scalar plus unpacked 32-bit unscaled offsets)
> +# SVE2 64-bit gather non-temporal load (scalar plus 64-bit unscaled offsets)
>  LDNT1_zprz      1100010 msz:2 00 rm:5 1 u:1 0 pg:3 rn:5 rd:5 \
> -                &rprr_gather_load xs=0 esz=3 scale=0 ff=0
> +                &rprr_gather_load xs=2 esz=3 scale=0 ff=0

We correct the xs= value here...

>
>  # SVE2 32-bit gather non-temporal load (scalar plus 32-bit unscaled offsets)
>  LDNT1_zprz      1000010 msz:2 00 rm:5 10 u:1 pg:3 rn:5 rd:5 \
> diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
> index 33ca1bcfac..2c23459e76 100644
> --- a/target/arm/translate-sve.c
> +++ b/target/arm/translate-sve.c
> @@ -6487,10 +6487,33 @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
>
>  static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
>  {
> +    gen_helper_gvec_mem_scatter *fn = NULL;
> +    bool be = s->be_data == MO_BE;
> +    bool mte = s->mte_active[0];
> +
> +    if (a->esz < a->msz + !a->u) {
> +        return false;
> +    }
>      if (!dc_isar_feature(aa64_sve2, s)) {
>          return false;
>      }
> -    return trans_LD1_zprz(s, a);
> +    if (!sve_access_check(s)) {
> +        return true;
> +    }
> +
> +    switch (a->esz) {
> +    case MO_32:
> +        fn = gather_load_fn32[mte][be][0][0][a->u][a->msz];
> +        break;
> +    case MO_64:
> +        fn = gather_load_fn64[mte][be][0][2][a->u][a->msz];
> +        break;
> +    }

...but then instead of using it we hard code use of 0 or 2 here,
which makes the change above a bit moot.

> +    assert(fn != NULL);
> +
> +    do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
> +               cpu_reg(s, a->rm), a->msz, false, fn);
> +    return true;
>  }

Anyway
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

thanks
-- PMM
Re: [PATCH v3] target/arm: Fix sve2 ldnt1 and stnt1
Posted by Peter Maydell 2 years, 1 month ago
On Tue, 8 Mar 2022 at 11:56, Peter Maydell <peter.maydell@linaro.org> wrote:
>
> Anyway
> Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

...and applied to target-arm.next.

-- PMM
Re: [PATCH v3] target/arm: Fix sve2 ldnt1 and stnt1
Posted by Richard Henderson 2 years, 1 month ago
On 3/8/22 01:56, Peter Maydell wrote:
>> -# SVE2 64-bit gather non-temporal load
>> -#   (scalar plus unpacked 32-bit unscaled offsets)
>> +# SVE2 64-bit gather non-temporal load (scalar plus 64-bit unscaled offsets)
>>   LDNT1_zprz      1100010 msz:2 00 rm:5 1 u:1 0 pg:3 rn:5 rd:5 \
>> -                &rprr_gather_load xs=0 esz=3 scale=0 ff=0
>> +                &rprr_gather_load xs=2 esz=3 scale=0 ff=0
> 
> We correct the xs= value here...
...
>> +    switch (a->esz) {
>> +    case MO_32:
>> +        fn = gather_load_fn32[mte][be][0][0][a->u][a->msz];
>> +        break;
>> +    case MO_64:
>> +        fn = gather_load_fn64[mte][be][0][2][a->u][a->msz];
>> +        break;
>> +    }
> 
> ...but then instead of using it we hard code use of 0 or 2 here,
> which makes the change above a bit moot.

Yeah, sorry for that.  I was in the middle of reducing the argument set collected by those 
decode patterns, then decided there was little advantage, and left something in the middle.



r~