[v1] target/s390x: Leverage 128-bit vector type in VLL / VSTL opcodes

[PATCH] target/s390x: Leverage 128-bit vector type in VLL / VSTL opcodes

Posted by Philippe Mathieu-Daudé 1 month, 1 week ago

The access is unaligned, we can easily replace the 2
consecutive 64-bit calls by a single 128-bit one.

Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
---
Based-on: <20260423135035.50126-1-philmd@linaro.org>
---
 target/s390x/tcg/vec.h        | 12 ++++++++++++
 target/s390x/tcg/vec_helper.c | 23 ++++++++++-------------
 2 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/target/s390x/tcg/vec.h b/target/s390x/tcg/vec.h
index 8d095efcfc6..34413f7e029 100644
--- a/target/s390x/tcg/vec.h
+++ b/target/s390x/tcg/vec.h
@@ -13,8 +13,10 @@
 #define S390X_VEC_H
 
 #include "tcg/tcg.h"
+#include "qemu/int128.h"
 
 typedef union S390Vector {
+    Int128 quadword;
     uint64_t doubleword[2];
     uint32_t word[4];
     uint16_t halfword[8];
@@ -72,6 +74,11 @@ static inline uint64_t s390_vec_read_element64(const S390Vector *v, uint8_t enr)
     return v->doubleword[enr];
 }
 
+static inline Int128 s390_vec_read_element128(const S390Vector *v)
+{
+    return v->quadword;
+}
+
 static inline uint64_t s390_vec_read_element(const S390Vector *v, uint8_t enr,
                                              uint8_t es)
 {
@@ -117,6 +124,11 @@ static inline void s390_vec_write_element64(S390Vector *v, uint8_t enr,
     v->doubleword[enr] = data;
 }
 
+static inline void s390_vec_write_element128(S390Vector *v, Int128 data)
+{
+    v->quadword = data;
+}
+
 static inline void s390_vec_write_element(S390Vector *v, uint8_t enr,
                                           uint8_t es, uint64_t data)
 {
diff --git a/target/s390x/tcg/vec_helper.c b/target/s390x/tcg/vec_helper.c
index 98eecd9fde6..4d5529e1781 100644
--- a/target/s390x/tcg/vec_helper.c
+++ b/target/s390x/tcg/vec_helper.c
@@ -43,19 +43,17 @@ void HELPER(gvec_vbperm)(void *v1, const void *v2, const void *v3,
 
 void HELPER(vll)(CPUS390XState *env, void *v1, uint64_t addr, uint64_t bytes)
 {
-    const int mmu_idx = cpu_mmu_index(env_cpu(env), false);
+    int mmu_idx = cpu_mmu_index(env_cpu(env), false);
     const uintptr_t ra = GETPC();
     MemOpIdx oi;
 
     if (likely(bytes >= 16)) {
-        uint64_t t0, t1;
+        Int128 t;
 
-        oi = make_memop_idx(MO_BE | MO_64 | MO_UNALN, mmu_idx);
-        t0 = cpu_ldq_mmu(env, addr, oi, ra);
-        addr = wrap_address(env, addr + 8);
-        t1 = cpu_ldq_mmu(env, addr, oi, ra);
-        s390_vec_write_element64(v1, 0, t0);
-        s390_vec_write_element64(v1, 1, t1);
+        oi = make_memop_idx(MO_BE | MO_128 | MO_UNALN, mmu_idx);
+        addr = wrap_address(env, addr + 16);
+        t = cpu_ld16_mmu(env, addr, oi, ra);
+        s390_vec_write_element128(v1, t);
     } else {
         S390Vector tmp = {};
 
@@ -198,7 +196,7 @@ void HELPER(gvec_vperm)(void *v1, const void *v2, const void *v3,
 void HELPER(vstl)(CPUS390XState *env, const void *v1, uint64_t addr,
                   uint64_t bytes)
 {
-    const int mmu_idx = cpu_mmu_index(env_cpu(env), false);
+    int mmu_idx = cpu_mmu_index(env_cpu(env), false);
     const uintptr_t ra = GETPC();
     MemOpIdx oi;
 
@@ -206,10 +204,9 @@ void HELPER(vstl)(CPUS390XState *env, const void *v1, uint64_t addr,
     probe_write_access(env, addr, MIN(bytes, 16), GETPC());
 
     if (likely(bytes >= 16)) {
-        oi = make_memop_idx(MO_BE | MO_64 | MO_UNALN, mmu_idx);
-        cpu_stq_mmu(env, addr, s390_vec_read_element64(v1, 0), oi, ra);
-        addr = wrap_address(env, addr + 8);
-        cpu_stq_mmu(env, addr, s390_vec_read_element64(v1, 1), oi, ra);
+        oi = make_memop_idx(MO_BE | MO_128 | MO_UNALN, mmu_idx);
+        addr = wrap_address(env, addr + 16);
+        cpu_st16_mmu(env, addr, s390_vec_read_element128(v1), oi, ra);
     } else {
         oi = make_memop_idx(MO_8, mmu_idx);
 
-- 
2.53.0

Re: [PATCH] target/s390x: Leverage 128-bit vector type in VLL / VSTL opcodes

Posted by Richard Henderson 1 month ago

On 4/24/26 00:12, Philippe Mathieu-Daudé wrote:
> @@ -43,19 +43,17 @@ void HELPER(gvec_vbperm)(void *v1, const void *v2, const void *v3,
>   
>   void HELPER(vll)(CPUS390XState *env, void *v1, uint64_t addr, uint64_t bytes)
>   {
> -    const int mmu_idx = cpu_mmu_index(env_cpu(env), false);
> +    int mmu_idx = cpu_mmu_index(env_cpu(env), false);
>       const uintptr_t ra = GETPC();
>       MemOpIdx oi;
>   
>       if (likely(bytes >= 16)) {
> -        uint64_t t0, t1;
> +        Int128 t;
>   
> -        oi = make_memop_idx(MO_BE | MO_64 | MO_UNALN, mmu_idx);
> -        t0 = cpu_ldq_mmu(env, addr, oi, ra);
> -        addr = wrap_address(env, addr + 8);
> -        t1 = cpu_ldq_mmu(env, addr, oi, ra);
> -        s390_vec_write_element64(v1, 0, t0);
> -        s390_vec_write_element64(v1, 1, t1);
> +        oi = make_memop_idx(MO_BE | MO_128 | MO_UNALN, mmu_idx);
> +        addr = wrap_address(env, addr + 16);
> +        t = cpu_ld16_mmu(env, addr, oi, ra);
> +        s390_vec_write_element128(v1, t);

The most important thing to consider when converting to 128-bit access is the atomicity, 
otherwise you can find yourself slowing down emulation when the host does not support 
atomic 128-bit access.

In this case, VLL (Vector Load with Length) loads n bytes, as can be seen with the 
fallback code just below.  So MO_ATOM_NONE is appropriate.  Similarly VSTL (Vector STore 
with Length).



r~

Re: [PATCH] target/s390x: Leverage 128-bit vector type in VLL / VSTL opcodes

Posted by Philippe Mathieu-Daudé 1 month, 1 week ago

On 23/4/26 16:12, Philippe Mathieu-Daudé wrote:
> The access is unaligned, we can easily replace the 2
> consecutive 64-bit calls by a single 128-bit one.
> 
> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
> ---
> Based-on: <20260423135035.50126-1-philmd@linaro.org>
> ---
>   target/s390x/tcg/vec.h        | 12 ++++++++++++
>   target/s390x/tcg/vec_helper.c | 23 ++++++++++-------------
>   2 files changed, 22 insertions(+), 13 deletions(-)


> @@ -198,7 +196,7 @@ void HELPER(gvec_vperm)(void *v1, const void *v2, const void *v3,
>   void HELPER(vstl)(CPUS390XState *env, const void *v1, uint64_t addr,
>                     uint64_t bytes)
>   {
> -    const int mmu_idx = cpu_mmu_index(env_cpu(env), false);
> +    int mmu_idx = cpu_mmu_index(env_cpu(env), false);

Oops, mis-rebase.