[PATCH v4 05/16] tcg/s390x: Implement tcg_out_ld/st for vector types

Richard Henderson posted 16 patches 4 years, 7 months ago
There is a newer version of this series
[PATCH v4 05/16] tcg/s390x: Implement tcg_out_ld/st for vector types
Posted by Richard Henderson 4 years, 7 months ago
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/s390x/tcg-target.c.inc | 122 +++++++++++++++++++++++++++++++++----
 1 file changed, 110 insertions(+), 12 deletions(-)

diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index 18233c628d..b6ea129e14 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -265,6 +265,12 @@ typedef enum S390Opcode {
     RX_STC      = 0x42,
     RX_STH      = 0x40,
 
+    VRX_VL      = 0xe706,
+    VRX_VLLEZ   = 0xe704,
+    VRX_VST     = 0xe70e,
+    VRX_VSTEF   = 0xe70b,
+    VRX_VSTEG   = 0xe70a,
+
     NOP         = 0x0707,
 } S390Opcode;
 
@@ -529,6 +535,31 @@ static void tcg_out_insn_RSY(TCGContext *s, S390Opcode op, TCGReg r1,
 #define tcg_out_insn_RX   tcg_out_insn_RS
 #define tcg_out_insn_RXY  tcg_out_insn_RSY
 
+static int RXB(TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
+{
+    /*
+     * Shift bit 4 of each regno to its corresponding bit of RXB.
+     * RXB itself begins at bit 8 of the instruction so 8 - 4 = 4
+     * is the left-shift of the 4th operand.
+     */
+    return ((v1 & 0x10) << (4 + 3))
+         | ((v2 & 0x10) << (4 + 2))
+         | ((v3 & 0x10) << (4 + 1))
+         | ((v4 & 0x10) << (4 + 0));
+}
+
+static void tcg_out_insn_VRX(TCGContext *s, S390Opcode op, TCGReg v1,
+                             TCGReg b2, TCGReg x2, intptr_t d2, int m3)
+{
+    tcg_debug_assert(v1 >= TCG_REG_V0 && v1 <= TCG_REG_V31);
+    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
+    tcg_debug_assert(x2 <= TCG_REG_R15);
+    tcg_debug_assert(b2 <= TCG_REG_R15);
+    tcg_out16(s, (op & 0xff00) | ((v1 & 15) << 4) | x2);
+    tcg_out16(s, (b2 << 12) | d2);
+    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
+}
+
 /* Emit an opcode with "type-checking" of the format.  */
 #define tcg_out_insn(S, FMT, OP, ...) \
     glue(tcg_out_insn_,FMT)(S, glue(glue(FMT,_),OP), ## __VA_ARGS__)
@@ -705,25 +736,92 @@ static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, S390Opcode opc_rxy,
     }
 }
 
+static void tcg_out_vrx_mem(TCGContext *s, S390Opcode opc_vrx,
+                            TCGReg data, TCGReg base, TCGReg index,
+                            tcg_target_long ofs, int m3)
+{
+    if (ofs < 0 || ofs >= 0x1000) {
+        if (ofs >= -0x80000 && ofs < 0x80000) {
+            tcg_out_insn(s, RXY, LAY, TCG_TMP0, base, index, ofs);
+            base = TCG_TMP0;
+            index = TCG_REG_NONE;
+            ofs = 0;
+        } else {
+            tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs);
+            if (index != TCG_REG_NONE) {
+                tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
+            }
+            index = TCG_TMP0;
+            ofs = 0;
+        }
+    }
+    tcg_out_insn_VRX(s, opc_vrx, data, base, index, ofs, m3);
+}
 
 /* load data without address translation or endianness conversion */
-static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data,
-                              TCGReg base, intptr_t ofs)
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data,
+                       TCGReg base, intptr_t ofs)
 {
-    if (type == TCG_TYPE_I32) {
-        tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs);
-    } else {
-        tcg_out_mem(s, 0, RXY_LG, data, base, TCG_REG_NONE, ofs);
+    switch (type) {
+    case TCG_TYPE_I32:
+        if (likely(data < 16)) {
+            tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs);
+            break;
+        }
+        tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_32);
+        break;
+
+    case TCG_TYPE_I64:
+        if (likely(data < 16)) {
+            tcg_out_mem(s, 0, RXY_LG, data, base, TCG_REG_NONE, ofs);
+            break;
+        }
+        /* fallthru */
+
+    case TCG_TYPE_V64:
+        tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_64);
+        break;
+
+    case TCG_TYPE_V128:
+        /* Hint quadword aligned.  */
+        tcg_out_vrx_mem(s, VRX_VL, data, base, TCG_REG_NONE, ofs, 4);
+        break;
+
+    default:
+        g_assert_not_reached();
     }
 }
 
-static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg data,
-                              TCGReg base, intptr_t ofs)
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg data,
+                       TCGReg base, intptr_t ofs)
 {
-    if (type == TCG_TYPE_I32) {
-        tcg_out_mem(s, RX_ST, RXY_STY, data, base, TCG_REG_NONE, ofs);
-    } else {
-        tcg_out_mem(s, 0, RXY_STG, data, base, TCG_REG_NONE, ofs);
+    switch (type) {
+    case TCG_TYPE_I32:
+        if (likely(data < 16)) {
+            tcg_out_mem(s, RX_ST, RXY_STY, data, base, TCG_REG_NONE, ofs);
+        } else {
+            tcg_out_vrx_mem(s, VRX_VSTEF, data, base, TCG_REG_NONE, ofs, 1);
+        }
+        break;
+
+    case TCG_TYPE_I64:
+        if (likely(data < 16)) {
+            tcg_out_mem(s, 0, RXY_STG, data, base, TCG_REG_NONE, ofs);
+            break;
+        }
+        /* fallthru */
+
+    case TCG_TYPE_V64:
+        tcg_out_vrx_mem(s, VRX_VSTEG, data, base, TCG_REG_NONE, ofs, 0);
+        break;
+
+    case TCG_TYPE_V128:
+        /* Hint quadword aligned.  */
+        tcg_out_vrx_mem(s, VRX_VST, data, base, TCG_REG_NONE, ofs, 4);
+        break;
+
+    default:
+        g_assert_not_reached();
     }
 }
 
-- 
2.25.1


Re: [PATCH v4 05/16] tcg/s390x: Implement tcg_out_ld/st for vector types
Posted by David Hildenbrand 4 years, 4 months ago
On 26.06.21 07:02, Richard Henderson wrote:
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   tcg/s390x/tcg-target.c.inc | 122 +++++++++++++++++++++++++++++++++----
>   1 file changed, 110 insertions(+), 12 deletions(-)
> 
> diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
> index 18233c628d..b6ea129e14 100644
> --- a/tcg/s390x/tcg-target.c.inc
> +++ b/tcg/s390x/tcg-target.c.inc
> @@ -265,6 +265,12 @@ typedef enum S390Opcode {
>       RX_STC      = 0x42,
>       RX_STH      = 0x40,
>   
> +    VRX_VL      = 0xe706,
> +    VRX_VLLEZ   = 0xe704,
> +    VRX_VST     = 0xe70e,
> +    VRX_VSTEF   = 0xe70b,
> +    VRX_VSTEG   = 0xe70a,
> +
>       NOP         = 0x0707,
>   } S390Opcode;
>   
> @@ -529,6 +535,31 @@ static void tcg_out_insn_RSY(TCGContext *s, S390Opcode op, TCGReg r1,
>   #define tcg_out_insn_RX   tcg_out_insn_RS
>   #define tcg_out_insn_RXY  tcg_out_insn_RSY
>   
> +static int RXB(TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
> +{
> +    /*
> +     * Shift bit 4 of each regno to its corresponding bit of RXB.
> +     * RXB itself begins at bit 8 of the instruction so 8 - 4 = 4
> +     * is the left-shift of the 4th operand.
> +     */
> +    return ((v1 & 0x10) << (4 + 3))
> +         | ((v2 & 0x10) << (4 + 2))
> +         | ((v3 & 0x10) << (4 + 1))
> +         | ((v4 & 0x10) << (4 + 0));
> +}
> +
> +static void tcg_out_insn_VRX(TCGContext *s, S390Opcode op, TCGReg v1,
> +                             TCGReg b2, TCGReg x2, intptr_t d2, int m3)

Is intptr_t really the right type here? Just curious ... I'd have used 
an uint16_t and asserted "!(d1 & 0xf000)".

> +{
> +    tcg_debug_assert(v1 >= TCG_REG_V0 && v1 <= TCG_REG_V31);
> +    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
> +    tcg_debug_assert(x2 <= TCG_REG_R15);
> +    tcg_debug_assert(b2 <= TCG_REG_R15);
> +    tcg_out16(s, (op & 0xff00) | ((v1 & 15) << 4) | x2);

Nit: ((v1 & 0xf) << 4)

makes it immediately clearer to me which bits are set by which piece of 
this puzzle :)

> +    tcg_out16(s, (b2 << 12) | d2);
> +    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
> +}
> +
>   /* Emit an opcode with "type-checking" of the format.  */
>   #define tcg_out_insn(S, FMT, OP, ...) \
>       glue(tcg_out_insn_,FMT)(S, glue(glue(FMT,_),OP), ## __VA_ARGS__)
> @@ -705,25 +736,92 @@ static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, S390Opcode opc_rxy,
>       }
>   }
>   
> +static void tcg_out_vrx_mem(TCGContext *s, S390Opcode opc_vrx,
> +                            TCGReg data, TCGReg base, TCGReg index,
> +                            tcg_target_long ofs, int m3)
> +{
> +    if (ofs < 0 || ofs >= 0x1000) {
> +        if (ofs >= -0x80000 && ofs < 0x80000) {
> +            tcg_out_insn(s, RXY, LAY, TCG_TMP0, base, index, ofs);
> +            base = TCG_TMP0;
> +            index = TCG_REG_NONE;
> +            ofs = 0;
> +        } else {
> +            tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs);
> +            if (index != TCG_REG_NONE) {
> +                tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
> +            }
> +            index = TCG_TMP0;
> +            ofs = 0;
> +        }
> +    }
> +    tcg_out_insn_VRX(s, opc_vrx, data, base, index, ofs, m3);
> +}
>   
>   /* load data without address translation or endianness conversion */
> -static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data,
> -                              TCGReg base, intptr_t ofs)
> +static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data,
> +                       TCGReg base, intptr_t ofs)
>   {
> -    if (type == TCG_TYPE_I32) {
> -        tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs);
> -    } else {
> -        tcg_out_mem(s, 0, RXY_LG, data, base, TCG_REG_NONE, ofs);
> +    switch (type) {
> +    case TCG_TYPE_I32:
> +        if (likely(data < 16)) {

This actually maps to "if (likely(data <= TCG_REG_R15))", correct?

-- 
Thanks,

David / dhildenb


Re: [PATCH v4 05/16] tcg/s390x: Implement tcg_out_ld/st for vector types
Posted by Richard Henderson 4 years, 4 months ago
On 9/14/21 9:46 AM, David Hildenbrand wrote:
>> +        if (likely(data < 16)) {
> 
> This actually maps to "if (likely(data <= TCG_REG_R15))", correct?

Sure.


r~

Re: [PATCH v4 05/16] tcg/s390x: Implement tcg_out_ld/st for vector types
Posted by Richard Henderson 4 years, 4 months ago
On 9/14/21 3:03 PM, Richard Henderson wrote:
> On 9/14/21 9:46 AM, David Hildenbrand wrote:
>>> +        if (likely(data < 16)) {
>>
>> This actually maps to "if (likely(data <= TCG_REG_R15))", correct?
> 
> Sure.

I'm going to add is_general_reg and is_vector_reg predicates.


r~


Re: [PATCH v4 05/16] tcg/s390x: Implement tcg_out_ld/st for vector types
Posted by David Hildenbrand 4 years, 4 months ago
On 15.09.21 00:14, Richard Henderson wrote:
> On 9/14/21 3:03 PM, Richard Henderson wrote:
>> On 9/14/21 9:46 AM, David Hildenbrand wrote:
>>>> +        if (likely(data < 16)) {
>>>
>>> This actually maps to "if (likely(data <= TCG_REG_R15))", correct?
>>
>> Sure.
> 
> I'm going to add is_general_reg and is_vector_reg predicates.

That will make it easier to grasp, thanks!


-- 
Thanks,

David / dhildenb


Re: [PATCH v4 05/16] tcg/s390x: Implement tcg_out_ld/st for vector types
Posted by Richard Henderson 4 years, 4 months ago
On 9/14/21 9:46 AM, David Hildenbrand wrote:
>> +static void tcg_out_insn_VRX(TCGContext *s, S390Opcode op, TCGReg v1,
>> +                             TCGReg b2, TCGReg x2, intptr_t d2, int m3)
> 
> Is intptr_t really the right type here? Just curious ... I'd have used an uint16_t and 
> asserted "!(d1 & 0xf000)".

It does come from upstream, as part of a host address. If you use uint16_t, the assert 
misses the upper bits being zero because they've been truncated.


r~