[v2] VDIV/VMOD Implementation

[PATCH v2 2/9] target/ppc: Implemented vector divide instructions

Posted by Lucas Mateus Castro(alqotel) 3 years, 10 months ago

From: "Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br>

Implement the following PowerISA v3.1 instructions:
vdivsw: Vector Divide Signed Word
vdivuw: Vector Divide Unsigned Word
vdivsd: Vector Divide Signed Doubleword
vdivud: Vector Divide Unsigned Doubleword

Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
---
 target/ppc/insn32.decode            |  7 ++++
 target/ppc/translate/vmx-impl.c.inc | 59 +++++++++++++++++++++++++++++
 2 files changed, 66 insertions(+)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index ac2d3da9a7..597768558b 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -703,3 +703,10 @@ XVTLSBB         111100 ... -- 00010 ..... 111011011 . - @XX2_bf_xb
 &XL_s           s:uint8_t
 @XL_s           ......-------------- s:1 .......... -   &XL_s
 RFEBB           010011-------------- .   0010010010 -   @XL_s
+
+## Vector Division Instructions
+
+VDIVSW          000100 ..... ..... ..... 00110001011    @VX
+VDIVUW          000100 ..... ..... ..... 00010001011    @VX
+VDIVSD          000100 ..... ..... ..... 00111001011    @VX
+VDIVUD          000100 ..... ..... ..... 00011001011    @VX
diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc
index 6101bca3fd..be35d6fdf3 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -3236,6 +3236,65 @@ TRANS(VMULHSD, do_vx_mulh, true , do_vx_vmulhd_i64)
 TRANS(VMULHUW, do_vx_mulh, false, do_vx_vmulhw_i64)
 TRANS(VMULHUD, do_vx_mulh, false, do_vx_vmulhd_i64)
 
+#define TRANS_VDIV_VMOD(FLAGS, NAME, VECE, FNI4_FUNC, FNI8_FUNC)        \
+static bool trans_##NAME(DisasContext *ctx, arg_VX *a)                  \
+{                                                                       \
+    static const GVecGen3 op = {                                        \
+        .fni4 = FNI4_FUNC,                                              \
+        .fni8 = FNI8_FUNC,                                              \
+        .vece = VECE                                                    \
+    };                                                                  \
+                                                                        \
+    REQUIRE_VECTOR(ctx);                                                \
+    REQUIRE_INSNS_FLAGS2(ctx, FLAGS);                                   \
+                                                                        \
+    tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra),    \
+                   avr_full_offset(a->vrb), 16, 16, &op);               \
+                                                                        \
+    return true;                                                        \
+}
+
+#define DO_VDIV_VMOD(NAME, SZ, DIV, SIGNED)                             \
+static void NAME(TCGv_i##SZ t, TCGv_i##SZ a, TCGv_i##SZ b)              \
+{                                                                       \
+    /*                                                                  \
+     *  If N/0 the instruction used by the backend might deliver        \
+     *  an invalid division signal to the process, so if b = 0 return   \
+     *  N/1 and if signed instruction, the same for a = int_min, b = -1 \
+     */                                                                 \
+    if (SIGNED) {                                                       \
+        TCGv_i##SZ t0 = tcg_temp_new_i##SZ();                           \
+        TCGv_i##SZ t1 = tcg_temp_new_i##SZ();                           \
+        tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t0, a, INT##SZ##_MIN);      \
+        tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t1, b, -1);                 \
+        tcg_gen_and_i##SZ(t0, t0, t1);                                  \
+        tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t1, b, 0);                  \
+        tcg_gen_or_i##SZ(t0, t0, t1);                                   \
+        tcg_gen_movi_i##SZ(t1, 0);                                      \
+        tcg_gen_movcond_i##SZ(TCG_COND_NE, b, t0, t1, t0, b);           \
+        DIV(t, a, b);                                                   \
+        tcg_temp_free_i##SZ(t0);                                        \
+        tcg_temp_free_i##SZ(t1);                                        \
+    } else {                                                            \
+        TCGv_i##SZ zero = tcg_constant_i##SZ(0);                        \
+        TCGv_i##SZ one = tcg_constant_i##SZ(1);                         \
+        tcg_gen_movcond_i##SZ(TCG_COND_EQ, b, b, zero, one, b);         \
+        DIV(t, a, b);                                                   \
+    }                                                                   \
+}
+
+DO_VDIV_VMOD(do_divsw, 32, tcg_gen_div_i32, true)
+DO_VDIV_VMOD(do_divuw, 32, tcg_gen_divu_i32, false)
+DO_VDIV_VMOD(do_divsd, 64, tcg_gen_div_i64, true)
+DO_VDIV_VMOD(do_divud, 64, tcg_gen_divu_i64, false)
+
+TRANS_VDIV_VMOD(ISA310, VDIVSW, MO_32, do_divsw, NULL)
+TRANS_VDIV_VMOD(ISA310, VDIVUW, MO_32, do_divuw, NULL)
+TRANS_VDIV_VMOD(ISA310, VDIVSD, MO_64, NULL, do_divsd)
+TRANS_VDIV_VMOD(ISA310, VDIVUD, MO_64, NULL, do_divud)
+
+#undef DO_VDIV_VMOD
+
 #undef GEN_VR_LDX
 #undef GEN_VR_STX
 #undef GEN_VR_LVE
-- 
2.31.1

Re: [PATCH v2 2/9] target/ppc: Implemented vector divide instructions

Posted by Richard Henderson 3 years, 10 months ago

On 4/5/22 12:55, Lucas Mateus Castro(alqotel) wrote:
> From: "Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br>
> 
> Implement the following PowerISA v3.1 instructions:
> vdivsw: Vector Divide Signed Word
> vdivuw: Vector Divide Unsigned Word
> vdivsd: Vector Divide Signed Doubleword
> vdivud: Vector Divide Unsigned Doubleword
> 
> Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
> ---
>   target/ppc/insn32.decode            |  7 ++++
>   target/ppc/translate/vmx-impl.c.inc | 59 +++++++++++++++++++++++++++++
>   2 files changed, 66 insertions(+)
> 
> diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
> index ac2d3da9a7..597768558b 100644
> --- a/target/ppc/insn32.decode
> +++ b/target/ppc/insn32.decode
> @@ -703,3 +703,10 @@ XVTLSBB         111100 ... -- 00010 ..... 111011011 . - @XX2_bf_xb
>   &XL_s           s:uint8_t
>   @XL_s           ......-------------- s:1 .......... -   &XL_s
>   RFEBB           010011-------------- .   0010010010 -   @XL_s
> +
> +## Vector Division Instructions
> +
> +VDIVSW          000100 ..... ..... ..... 00110001011    @VX
> +VDIVUW          000100 ..... ..... ..... 00010001011    @VX
> +VDIVSD          000100 ..... ..... ..... 00111001011    @VX
> +VDIVUD          000100 ..... ..... ..... 00011001011    @VX
> diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc
> index 6101bca3fd..be35d6fdf3 100644
> --- a/target/ppc/translate/vmx-impl.c.inc
> +++ b/target/ppc/translate/vmx-impl.c.inc
> @@ -3236,6 +3236,65 @@ TRANS(VMULHSD, do_vx_mulh, true , do_vx_vmulhd_i64)
>   TRANS(VMULHUW, do_vx_mulh, false, do_vx_vmulhw_i64)
>   TRANS(VMULHUD, do_vx_mulh, false, do_vx_vmulhd_i64)
>   
> +#define TRANS_VDIV_VMOD(FLAGS, NAME, VECE, FNI4_FUNC, FNI8_FUNC)        \
> +static bool trans_##NAME(DisasContext *ctx, arg_VX *a)                  \
> +{                                                                       \
> +    static const GVecGen3 op = {                                        \
> +        .fni4 = FNI4_FUNC,                                              \
> +        .fni8 = FNI8_FUNC,                                              \
> +        .vece = VECE                                                    \
> +    };                                                                  \
> +                                                                        \
> +    REQUIRE_VECTOR(ctx);                                                \
> +    REQUIRE_INSNS_FLAGS2(ctx, FLAGS);                                   \
> +                                                                        \
> +    tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra),    \
> +                   avr_full_offset(a->vrb), 16, 16, &op);               \
> +                                                                        \
> +    return true;                                                        \
> +}

Better to use a standalone helper and TRANS() -- the op structure doesn't *need* to be 
static const.

> +
> +#define DO_VDIV_VMOD(NAME, SZ, DIV, SIGNED)                             \
> +static void NAME(TCGv_i##SZ t, TCGv_i##SZ a, TCGv_i##SZ b)              \
> +{                                                                       \
> +    /*                                                                  \
> +     *  If N/0 the instruction used by the backend might deliver        \
> +     *  an invalid division signal to the process, so if b = 0 return   \
> +     *  N/1 and if signed instruction, the same for a = int_min, b = -1 \
> +     */                                                                 \
> +    if (SIGNED) {                                                       \
> +        TCGv_i##SZ t0 = tcg_temp_new_i##SZ();                           \
> +        TCGv_i##SZ t1 = tcg_temp_new_i##SZ();                           \
> +        tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t0, a, INT##SZ##_MIN);      \
> +        tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t1, b, -1);                 \
> +        tcg_gen_and_i##SZ(t0, t0, t1);                                  \
> +        tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t1, b, 0);                  \
> +        tcg_gen_or_i##SZ(t0, t0, t1);                                   \
> +        tcg_gen_movi_i##SZ(t1, 0);                                      \
> +        tcg_gen_movcond_i##SZ(TCG_COND_NE, b, t0, t1, t0, b);           \
> +        DIV(t, a, b);                                                   \
> +        tcg_temp_free_i##SZ(t0);                                        \
> +        tcg_temp_free_i##SZ(t1);                                        \
> +    } else {                                                            \
> +        TCGv_i##SZ zero = tcg_constant_i##SZ(0);                        \
> +        TCGv_i##SZ one = tcg_constant_i##SZ(1);                         \
> +        tcg_gen_movcond_i##SZ(TCG_COND_EQ, b, b, zero, one, b);         \
> +        DIV(t, a, b);                                                   \
> +    }                                                                   \
> +}

This is overkill.  Even if you keep some macros, passing in SIGNED and using it in the 
outermost if is a sign you should split the macro in two.

However, only tcg_gen_div_i64 really requires the full signed treatment; tcg_gen_div_i32 
can be better handled by extending to i64, because INT32_MIN / -1ULL does not trap.

I think this would be much easier to read as 4 separate functions.


r~

Re: [PATCH v2 2/9] target/ppc: Implemented vector divide instructions

Posted by Lucas Mateus Martins Araujo e Castro 3 years, 9 months ago

On 11/04/2022 22:51, Richard Henderson wrote:
>
> On 4/5/22 12:55, Lucas Mateus Castro(alqotel) wrote:
>
>> +
>> +#define DO_VDIV_VMOD(NAME, SZ, DIV, 
>> SIGNED)                             \
>> +static void NAME(TCGv_i##SZ t, TCGv_i##SZ a, TCGv_i##SZ 
>> b)              \
>> +{ \
>> + /* \
>> +     *  If N/0 the instruction used by the backend might 
>> deliver        \
>> +     *  an invalid division signal to the process, so if b = 0 
>> return   \
>> +     *  N/1 and if signed instruction, the same for a = int_min, b = 
>> -1 \
>> + */ \
>> +    if (SIGNED) 
>> {                                                       \
>> +        TCGv_i##SZ t0 = 
>> tcg_temp_new_i##SZ();                           \
>> +        TCGv_i##SZ t1 = 
>> tcg_temp_new_i##SZ();                           \
>> +        tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t0, a, 
>> INT##SZ##_MIN);      \
>> +        tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t1, b, 
>> -1);                 \
>> +        tcg_gen_and_i##SZ(t0, t0, 
>> t1);                                  \
>> +        tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t1, b, 
>> 0);                  \
>> +        tcg_gen_or_i##SZ(t0, t0, 
>> t1);                                   \
>> +        tcg_gen_movi_i##SZ(t1, 
>> 0);                                      \
>> +        tcg_gen_movcond_i##SZ(TCG_COND_NE, b, t0, t1, t0, 
>> b);           \
>> +        DIV(t, a, 
>> b);                                                   \
>> + tcg_temp_free_i##SZ(t0); \
>> + tcg_temp_free_i##SZ(t1); \
>> +    } else 
>> {                                                            \
>> +        TCGv_i##SZ zero = 
>> tcg_constant_i##SZ(0);                        \
>> +        TCGv_i##SZ one = 
>> tcg_constant_i##SZ(1);                         \
>> +        tcg_gen_movcond_i##SZ(TCG_COND_EQ, b, b, zero, one, 
>> b);         \
>> +        DIV(t, a, 
>> b);                                                   \
>> + } \
>> +}
>
> This is overkill.  Even if you keep some macros, passing in SIGNED and 
> using it in the
> outermost if is a sign you should split the macro in two.
>
> However, only tcg_gen_div_i64 really requires the full signed 
> treatment; tcg_gen_div_i32
> can be better handled by extending to i64, because INT32_MIN / -1ULL 
> does not trap.
>
> I think this would be much easier to read as 4 separate functions.
>
>
Ok, I'll change it to 4 different macros, move clz128 to int128.h and 
turn TRANS_VDIV_VMOD into do_vdiv_vmod function and call it with TRANS() 
in v3
> r~
-- 
Lucas Mateus M. Araujo e Castro
Instituto de Pesquisas ELDORADO 
<https://www.eldorado.org.br/?utm_campaign=assinatura_de_e-mail&utm_medium=email&utm_source=RD+Station>
Departamento Computação Embarcada
Analista de Software Trainee
Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>