target/s390x/tcg/vec.h | 12 ++++++++++++ target/s390x/tcg/vec_helper.c | 17 ++++++----------- 2 files changed, 18 insertions(+), 11 deletions(-)
The access is unaligned (MO_UNALN), we can easily replace the 2
consecutive 64-bit calls by a single 128-bit one.
Considering atomicity of the new 128-bit access, the both opcodes
VLL (Vector Load with Length) and VSTL (Vector STore with Length)
access byte granularity, so use MO_ATOM_NONE.
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
---
Based-on: <20260423135035.50126-1-philmd@linaro.org>
v3:
- do not increase addr (Ilya)
- MO_ATOM_NONE (Richard)
---
target/s390x/tcg/vec.h | 12 ++++++++++++
target/s390x/tcg/vec_helper.c | 17 ++++++-----------
2 files changed, 18 insertions(+), 11 deletions(-)
diff --git a/target/s390x/tcg/vec.h b/target/s390x/tcg/vec.h
index 8d095efcfc6..34413f7e029 100644
--- a/target/s390x/tcg/vec.h
+++ b/target/s390x/tcg/vec.h
@@ -13,8 +13,10 @@
#define S390X_VEC_H
#include "tcg/tcg.h"
+#include "qemu/int128.h"
typedef union S390Vector {
+ Int128 quadword;
uint64_t doubleword[2];
uint32_t word[4];
uint16_t halfword[8];
@@ -72,6 +74,11 @@ static inline uint64_t s390_vec_read_element64(const S390Vector *v, uint8_t enr)
return v->doubleword[enr];
}
+static inline Int128 s390_vec_read_element128(const S390Vector *v)
+{
+ return v->quadword;
+}
+
static inline uint64_t s390_vec_read_element(const S390Vector *v, uint8_t enr,
uint8_t es)
{
@@ -117,6 +124,11 @@ static inline void s390_vec_write_element64(S390Vector *v, uint8_t enr,
v->doubleword[enr] = data;
}
+static inline void s390_vec_write_element128(S390Vector *v, Int128 data)
+{
+ v->quadword = data;
+}
+
static inline void s390_vec_write_element(S390Vector *v, uint8_t enr,
uint8_t es, uint64_t data)
{
diff --git a/target/s390x/tcg/vec_helper.c b/target/s390x/tcg/vec_helper.c
index 98eecd9fde6..a86819f61b6 100644
--- a/target/s390x/tcg/vec_helper.c
+++ b/target/s390x/tcg/vec_helper.c
@@ -48,14 +48,11 @@ void HELPER(vll)(CPUS390XState *env, void *v1, uint64_t addr, uint64_t bytes)
MemOpIdx oi;
if (likely(bytes >= 16)) {
- uint64_t t0, t1;
+ Int128 t;
- oi = make_memop_idx(MO_BE | MO_64 | MO_UNALN, mmu_idx);
- t0 = cpu_ldq_mmu(env, addr, oi, ra);
- addr = wrap_address(env, addr + 8);
- t1 = cpu_ldq_mmu(env, addr, oi, ra);
- s390_vec_write_element64(v1, 0, t0);
- s390_vec_write_element64(v1, 1, t1);
+ oi = make_memop_idx(MO_128 | MO_UNALN | MO_ATOM_NONE | MO_BE, mmu_idx);
+ t = cpu_ld16_mmu(env, addr, oi, ra);
+ s390_vec_write_element128(v1, t);
} else {
S390Vector tmp = {};
@@ -206,10 +203,8 @@ void HELPER(vstl)(CPUS390XState *env, const void *v1, uint64_t addr,
probe_write_access(env, addr, MIN(bytes, 16), GETPC());
if (likely(bytes >= 16)) {
- oi = make_memop_idx(MO_BE | MO_64 | MO_UNALN, mmu_idx);
- cpu_stq_mmu(env, addr, s390_vec_read_element64(v1, 0), oi, ra);
- addr = wrap_address(env, addr + 8);
- cpu_stq_mmu(env, addr, s390_vec_read_element64(v1, 1), oi, ra);
+ oi = make_memop_idx(MO_128 | MO_UNALN | MO_ATOM_NONE | MO_BE, mmu_idx);
+ cpu_st16_mmu(env, addr, s390_vec_read_element128(v1), oi, ra);
} else {
oi = make_memop_idx(MO_8, mmu_idx);
--
2.53.0
On 4/24/26 11:43, Philippe Mathieu-Daudé wrote:
> The access is unaligned (MO_UNALN), we can easily replace the 2
> consecutive 64-bit calls by a single 128-bit one.
> Considering atomicity of the new 128-bit access, the both opcodes
> VLL (Vector Load with Length) and VSTL (Vector STore with Length)
> access byte granularity, so use MO_ATOM_NONE.
>
> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
> ---
> Based-on: <20260423135035.50126-1-philmd@linaro.org>
>
> v3:
> - do not increase addr (Ilya)
> - MO_ATOM_NONE (Richard)
> ---
> target/s390x/tcg/vec.h | 12 ++++++++++++
> target/s390x/tcg/vec_helper.c | 17 ++++++-----------
> 2 files changed, 18 insertions(+), 11 deletions(-)
>
> diff --git a/target/s390x/tcg/vec.h b/target/s390x/tcg/vec.h
> index 8d095efcfc6..34413f7e029 100644
> --- a/target/s390x/tcg/vec.h
> +++ b/target/s390x/tcg/vec.h
> @@ -13,8 +13,10 @@
> #define S390X_VEC_H
>
> #include "tcg/tcg.h"
> +#include "qemu/int128.h"
>
> typedef union S390Vector {
> + Int128 quadword;
> uint64_t doubleword[2];
> uint32_t word[4];
> uint16_t halfword[8];
> @@ -72,6 +74,11 @@ static inline uint64_t s390_vec_read_element64(const S390Vector *v, uint8_t enr)
> return v->doubleword[enr];
> }
>
> +static inline Int128 s390_vec_read_element128(const S390Vector *v)
> +{
> + return v->quadword;
> +}
> +
> static inline uint64_t s390_vec_read_element(const S390Vector *v, uint8_t enr,
> uint8_t es)
> {
> @@ -117,6 +124,11 @@ static inline void s390_vec_write_element64(S390Vector *v, uint8_t enr,
> v->doubleword[enr] = data;
> }
>
> +static inline void s390_vec_write_element128(S390Vector *v, Int128 data)
> +{
> + v->quadword = data;
> +}
> +
> static inline void s390_vec_write_element(S390Vector *v, uint8_t enr,
> uint8_t es, uint64_t data)
> {
> diff --git a/target/s390x/tcg/vec_helper.c b/target/s390x/tcg/vec_helper.c
> index 98eecd9fde6..a86819f61b6 100644
> --- a/target/s390x/tcg/vec_helper.c
> +++ b/target/s390x/tcg/vec_helper.c
> @@ -48,14 +48,11 @@ void HELPER(vll)(CPUS390XState *env, void *v1, uint64_t addr, uint64_t bytes)
> MemOpIdx oi;
>
> if (likely(bytes >= 16)) {
> - uint64_t t0, t1;
> + Int128 t;
>
> - oi = make_memop_idx(MO_BE | MO_64 | MO_UNALN, mmu_idx);
> - t0 = cpu_ldq_mmu(env, addr, oi, ra);
> - addr = wrap_address(env, addr + 8);
> - t1 = cpu_ldq_mmu(env, addr, oi, ra);
> - s390_vec_write_element64(v1, 0, t0);
> - s390_vec_write_element64(v1, 1, t1);
> + oi = make_memop_idx(MO_128 | MO_UNALN | MO_ATOM_NONE | MO_BE, mmu_idx);
> + t = cpu_ld16_mmu(env, addr, oi, ra);
> + s390_vec_write_element128(v1, t);
> } else {
> S390Vector tmp = {};
>
> @@ -206,10 +203,8 @@ void HELPER(vstl)(CPUS390XState *env, const void *v1, uint64_t addr,
> probe_write_access(env, addr, MIN(bytes, 16), GETPC());
>
> if (likely(bytes >= 16)) {
> - oi = make_memop_idx(MO_BE | MO_64 | MO_UNALN, mmu_idx);
> - cpu_stq_mmu(env, addr, s390_vec_read_element64(v1, 0), oi, ra);
> - addr = wrap_address(env, addr + 8);
> - cpu_stq_mmu(env, addr, s390_vec_read_element64(v1, 1), oi, ra);
> + oi = make_memop_idx(MO_128 | MO_UNALN | MO_ATOM_NONE | MO_BE, mmu_idx);
> + cpu_st16_mmu(env, addr, s390_vec_read_element128(v1), oi, ra);
> } else {
> oi = make_memop_idx(MO_8, mmu_idx);
>
In general LGTM, but I just tried this on top of
20260423135035.50126-1-philmd@linaro.org and I'm consistently getting:
make[2]: *** [Makefile:236: run-precise-smc-softmmu] Error 1
The test uses VSTL, so it's definitely related.
On 25/4/26 00:05, Ilya Leoshkevich wrote:
>
> On 4/24/26 11:43, Philippe Mathieu-Daudé wrote:
>> The access is unaligned (MO_UNALN), we can easily replace the 2
>> consecutive 64-bit calls by a single 128-bit one.
>> Considering atomicity of the new 128-bit access, the both opcodes
>> VLL (Vector Load with Length) and VSTL (Vector STore with Length)
>> access byte granularity, so use MO_ATOM_NONE.
>>
>> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
>> ---
>> Based-on: <20260423135035.50126-1-philmd@linaro.org>
>>
>> v3:
>> - do not increase addr (Ilya)
>> - MO_ATOM_NONE (Richard)
>> ---
>> target/s390x/tcg/vec.h | 12 ++++++++++++
>> target/s390x/tcg/vec_helper.c | 17 ++++++-----------
>> 2 files changed, 18 insertions(+), 11 deletions(-)
>>
>> diff --git a/target/s390x/tcg/vec.h b/target/s390x/tcg/vec.h
>> index 8d095efcfc6..34413f7e029 100644
>> --- a/target/s390x/tcg/vec.h
>> +++ b/target/s390x/tcg/vec.h
>> @@ -13,8 +13,10 @@
>> #define S390X_VEC_H
>> #include "tcg/tcg.h"
>> +#include "qemu/int128.h"
>> typedef union S390Vector {
>> + Int128 quadword;
>> uint64_t doubleword[2];
>> uint32_t word[4];
>> uint16_t halfword[8];
>> @@ -72,6 +74,11 @@ static inline uint64_t
>> s390_vec_read_element64(const S390Vector *v, uint8_t enr)
>> return v->doubleword[enr];
>> }
>> +static inline Int128 s390_vec_read_element128(const S390Vector *v)
>> +{
>> + return v->quadword;
>> +}
>> +
>> static inline uint64_t s390_vec_read_element(const S390Vector *v,
>> uint8_t enr,
>> uint8_t es)
>> {
>> @@ -117,6 +124,11 @@ static inline void
>> s390_vec_write_element64(S390Vector *v, uint8_t enr,
>> v->doubleword[enr] = data;
>> }
>> +static inline void s390_vec_write_element128(S390Vector *v, Int128 data)
>> +{
>> + v->quadword = data;
>> +}
>> +
>> static inline void s390_vec_write_element(S390Vector *v, uint8_t enr,
>> uint8_t es, uint64_t data)
>> {
>> diff --git a/target/s390x/tcg/vec_helper.c b/target/s390x/tcg/
>> vec_helper.c
>> index 98eecd9fde6..a86819f61b6 100644
>> --- a/target/s390x/tcg/vec_helper.c
>> +++ b/target/s390x/tcg/vec_helper.c
>> @@ -48,14 +48,11 @@ void HELPER(vll)(CPUS390XState *env, void *v1,
>> uint64_t addr, uint64_t bytes)
>> MemOpIdx oi;
>> if (likely(bytes >= 16)) {
>> - uint64_t t0, t1;
>> + Int128 t;
>> - oi = make_memop_idx(MO_BE | MO_64 | MO_UNALN, mmu_idx);
>> - t0 = cpu_ldq_mmu(env, addr, oi, ra);
>> - addr = wrap_address(env, addr + 8);
>> - t1 = cpu_ldq_mmu(env, addr, oi, ra);
>> - s390_vec_write_element64(v1, 0, t0);
>> - s390_vec_write_element64(v1, 1, t1);
>> + oi = make_memop_idx(MO_128 | MO_UNALN | MO_ATOM_NONE | MO_BE,
>> mmu_idx);
>> + t = cpu_ld16_mmu(env, addr, oi, ra);
>> + s390_vec_write_element128(v1, t);
>> } else {
>> S390Vector tmp = {};
>> @@ -206,10 +203,8 @@ void HELPER(vstl)(CPUS390XState *env, const void
>> *v1, uint64_t addr,
>> probe_write_access(env, addr, MIN(bytes, 16), GETPC());
>> if (likely(bytes >= 16)) {
>> - oi = make_memop_idx(MO_BE | MO_64 | MO_UNALN, mmu_idx);
>> - cpu_stq_mmu(env, addr, s390_vec_read_element64(v1, 0), oi, ra);
>> - addr = wrap_address(env, addr + 8);
>> - cpu_stq_mmu(env, addr, s390_vec_read_element64(v1, 1), oi, ra);
>> + oi = make_memop_idx(MO_128 | MO_UNALN | MO_ATOM_NONE | MO_BE,
>> mmu_idx);
>> + cpu_st16_mmu(env, addr, s390_vec_read_element128(v1), oi, ra);
>> } else {
>> oi = make_memop_idx(MO_8, mmu_idx);
>
>
> In general LGTM, but I just tried this on top of 20260423135035.50126-1-
> philmd@linaro.org and I'm consistently getting:
>
>
> make[2]: *** [Makefile:236: run-precise-smc-softmmu] Error 1
>
>
> The test uses VSTL, so it's definitely related.
Is this on a big-endian host? (Asking because this is the single
configuration I didn't tested)
On 4/25/26 11:09, Philippe Mathieu-Daudé wrote:
> On 25/4/26 00:05, Ilya Leoshkevich wrote:
>>
>> On 4/24/26 11:43, Philippe Mathieu-Daudé wrote:
>>> The access is unaligned (MO_UNALN), we can easily replace the 2
>>> consecutive 64-bit calls by a single 128-bit one.
>>> Considering atomicity of the new 128-bit access, the both opcodes
>>> VLL (Vector Load with Length) and VSTL (Vector STore with Length)
>>> access byte granularity, so use MO_ATOM_NONE.
>>>
>>> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
>>> ---
>>> Based-on: <20260423135035.50126-1-philmd@linaro.org>
>>>
>>> v3:
>>> - do not increase addr (Ilya)
>>> - MO_ATOM_NONE (Richard)
>>> ---
>>> target/s390x/tcg/vec.h | 12 ++++++++++++
>>> target/s390x/tcg/vec_helper.c | 17 ++++++-----------
>>> 2 files changed, 18 insertions(+), 11 deletions(-)
>>>
>>> diff --git a/target/s390x/tcg/vec.h b/target/s390x/tcg/vec.h
>>> index 8d095efcfc6..34413f7e029 100644
>>> --- a/target/s390x/tcg/vec.h
>>> +++ b/target/s390x/tcg/vec.h
>>> @@ -13,8 +13,10 @@
>>> #define S390X_VEC_H
>>> #include "tcg/tcg.h"
>>> +#include "qemu/int128.h"
>>> typedef union S390Vector {
>>> + Int128 quadword;
>>> uint64_t doubleword[2];
>>> uint32_t word[4];
>>> uint16_t halfword[8];
>>> @@ -72,6 +74,11 @@ static inline uint64_t
>>> s390_vec_read_element64(const S390Vector *v, uint8_t enr)
>>> return v->doubleword[enr];
>>> }
>>> +static inline Int128 s390_vec_read_element128(const S390Vector *v)
>>> +{
>>> + return v->quadword;
>>> +}
>>> +
>>> static inline uint64_t s390_vec_read_element(const S390Vector *v,
>>> uint8_t enr,
>>> uint8_t es)
>>> {
>>> @@ -117,6 +124,11 @@ static inline void
>>> s390_vec_write_element64(S390Vector *v, uint8_t enr,
>>> v->doubleword[enr] = data;
>>> }
>>> +static inline void s390_vec_write_element128(S390Vector *v, Int128
>>> data)
>>> +{
>>> + v->quadword = data;
>>> +}
>>> +
>>> static inline void s390_vec_write_element(S390Vector *v, uint8_t enr,
>>> uint8_t es, uint64_t data)
>>> {
>>> diff --git a/target/s390x/tcg/vec_helper.c b/target/s390x/tcg/
>>> vec_helper.c
>>> index 98eecd9fde6..a86819f61b6 100644
>>> --- a/target/s390x/tcg/vec_helper.c
>>> +++ b/target/s390x/tcg/vec_helper.c
>>> @@ -48,14 +48,11 @@ void HELPER(vll)(CPUS390XState *env, void *v1,
>>> uint64_t addr, uint64_t bytes)
>>> MemOpIdx oi;
>>> if (likely(bytes >= 16)) {
>>> - uint64_t t0, t1;
>>> + Int128 t;
>>> - oi = make_memop_idx(MO_BE | MO_64 | MO_UNALN, mmu_idx);
>>> - t0 = cpu_ldq_mmu(env, addr, oi, ra);
>>> - addr = wrap_address(env, addr + 8);
>>> - t1 = cpu_ldq_mmu(env, addr, oi, ra);
>>> - s390_vec_write_element64(v1, 0, t0);
>>> - s390_vec_write_element64(v1, 1, t1);
>>> + oi = make_memop_idx(MO_128 | MO_UNALN | MO_ATOM_NONE |
>>> MO_BE, mmu_idx);
>>> + t = cpu_ld16_mmu(env, addr, oi, ra);
>>> + s390_vec_write_element128(v1, t);
>>> } else {
>>> S390Vector tmp = {};
>>> @@ -206,10 +203,8 @@ void HELPER(vstl)(CPUS390XState *env, const
>>> void *v1, uint64_t addr,
>>> probe_write_access(env, addr, MIN(bytes, 16), GETPC());
>>> if (likely(bytes >= 16)) {
>>> - oi = make_memop_idx(MO_BE | MO_64 | MO_UNALN, mmu_idx);
>>> - cpu_stq_mmu(env, addr, s390_vec_read_element64(v1, 0), oi,
>>> ra);
>>> - addr = wrap_address(env, addr + 8);
>>> - cpu_stq_mmu(env, addr, s390_vec_read_element64(v1, 1), oi,
>>> ra);
>>> + oi = make_memop_idx(MO_128 | MO_UNALN | MO_ATOM_NONE |
>>> MO_BE, mmu_idx);
>>> + cpu_st16_mmu(env, addr, s390_vec_read_element128(v1), oi, ra);
>>> } else {
>>> oi = make_memop_idx(MO_8, mmu_idx);
>>
>>
>> In general LGTM, but I just tried this on top of
>> 20260423135035.50126-1- philmd@linaro.org and I'm consistently getting:
>>
>>
>> make[2]: *** [Makefile:236: run-precise-smc-softmmu] Error 1
>>
>>
>> The test uses VSTL, so it's definitely related.
>
> Is this on a big-endian host? (Asking because this is the single
> configuration I didn't tested)
No, that's my x86_64 laptop.
Configure flags: '--target-list=s390x-linux-user,s390x-softmmu'
'--disable-docs'
$ git log -1
commit 2fc3bdf329c54489438408161ebc907a6fa62329 (HEAD)
Author: Philippe Mathieu-Daudé <philmd@linaro.org>
Date: Fri Apr 24 12:43:25 2026 +0200
target/s390x: Leverage 128-bit vector type in V{L, ST}L opcodes
$ uname -a && make -j24 && make -j24 check-tcg ; echo $?
Linux heavy 6.19.10-100.fc42.x86_64 #1 SMP PREEMPT_DYNAMIC Wed Mar 25
17:18:37 UTC 2026 x86_64 GNU/Linux
[...]
make[2]: *** [Makefile:236: run-precise-smc-softmmu] Error 1
$ git revert HEAD
$ uname -a && make -j24 && make -j24 check-tcg ; echo $?
[...]
0
On 4/24/26 20:43, Philippe Mathieu-Daudé wrote: > The access is unaligned (MO_UNALN), we can easily replace the 2 > consecutive 64-bit calls by a single 128-bit one. > Considering atomicity of the new 128-bit access, the both opcodes > VLL (Vector Load with Length) and VSTL (Vector STore with Length) > access byte granularity, so use MO_ATOM_NONE. > > Signed-off-by: Philippe Mathieu-Daudé<philmd@linaro.org> > --- > Based-on:<20260423135035.50126-1-philmd@linaro.org> > > v3: > - do not increase addr (Ilya) > - MO_ATOM_NONE (Richard) > --- > target/s390x/tcg/vec.h | 12 ++++++++++++ > target/s390x/tcg/vec_helper.c | 17 ++++++----------- > 2 files changed, 18 insertions(+), 11 deletions(-) Reviewed-by: Richard Henderson <richard.henderson@linaro.org> r~
© 2016 - 2026 Red Hat, Inc.