From: "Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br>
Implement the following PowerISA v3.1 instructions:
vdivsw: Vector Divide Signed Word
vdivuw: Vector Divide Unsigned Word
vdivsd: Vector Divide Signed Doubleword
vdivud: Vector Divide Unsigned Doubleword
Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
---
target/ppc/insn32.decode | 7 ++++
target/ppc/translate/vmx-impl.c.inc | 59 +++++++++++++++++++++++++++++
2 files changed, 66 insertions(+)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index ac2d3da9a7..597768558b 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -703,3 +703,10 @@ XVTLSBB 111100 ... -- 00010 ..... 111011011 . - @XX2_bf_xb
&XL_s s:uint8_t
@XL_s ......-------------- s:1 .......... - &XL_s
RFEBB 010011-------------- . 0010010010 - @XL_s
+
+## Vector Division Instructions
+
+VDIVSW 000100 ..... ..... ..... 00110001011 @VX
+VDIVUW 000100 ..... ..... ..... 00010001011 @VX
+VDIVSD 000100 ..... ..... ..... 00111001011 @VX
+VDIVUD 000100 ..... ..... ..... 00011001011 @VX
diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc
index 6101bca3fd..be35d6fdf3 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -3236,6 +3236,65 @@ TRANS(VMULHSD, do_vx_mulh, true , do_vx_vmulhd_i64)
TRANS(VMULHUW, do_vx_mulh, false, do_vx_vmulhw_i64)
TRANS(VMULHUD, do_vx_mulh, false, do_vx_vmulhd_i64)
+#define TRANS_VDIV_VMOD(FLAGS, NAME, VECE, FNI4_FUNC, FNI8_FUNC) \
+static bool trans_##NAME(DisasContext *ctx, arg_VX *a) \
+{ \
+ static const GVecGen3 op = { \
+ .fni4 = FNI4_FUNC, \
+ .fni8 = FNI8_FUNC, \
+ .vece = VECE \
+ }; \
+ \
+ REQUIRE_VECTOR(ctx); \
+ REQUIRE_INSNS_FLAGS2(ctx, FLAGS); \
+ \
+ tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra), \
+ avr_full_offset(a->vrb), 16, 16, &op); \
+ \
+ return true; \
+}
+
+#define DO_VDIV_VMOD(NAME, SZ, DIV, SIGNED) \
+static void NAME(TCGv_i##SZ t, TCGv_i##SZ a, TCGv_i##SZ b) \
+{ \
+ /* \
+ * If N/0 the instruction used by the backend might deliver \
+ * an invalid division signal to the process, so if b = 0 return \
+ * N/1 and if signed instruction, the same for a = int_min, b = -1 \
+ */ \
+ if (SIGNED) { \
+ TCGv_i##SZ t0 = tcg_temp_new_i##SZ(); \
+ TCGv_i##SZ t1 = tcg_temp_new_i##SZ(); \
+ tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t0, a, INT##SZ##_MIN); \
+ tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t1, b, -1); \
+ tcg_gen_and_i##SZ(t0, t0, t1); \
+ tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t1, b, 0); \
+ tcg_gen_or_i##SZ(t0, t0, t1); \
+ tcg_gen_movi_i##SZ(t1, 0); \
+ tcg_gen_movcond_i##SZ(TCG_COND_NE, b, t0, t1, t0, b); \
+ DIV(t, a, b); \
+ tcg_temp_free_i##SZ(t0); \
+ tcg_temp_free_i##SZ(t1); \
+ } else { \
+ TCGv_i##SZ zero = tcg_constant_i##SZ(0); \
+ TCGv_i##SZ one = tcg_constant_i##SZ(1); \
+ tcg_gen_movcond_i##SZ(TCG_COND_EQ, b, b, zero, one, b); \
+ DIV(t, a, b); \
+ } \
+}
+
+DO_VDIV_VMOD(do_divsw, 32, tcg_gen_div_i32, true)
+DO_VDIV_VMOD(do_divuw, 32, tcg_gen_divu_i32, false)
+DO_VDIV_VMOD(do_divsd, 64, tcg_gen_div_i64, true)
+DO_VDIV_VMOD(do_divud, 64, tcg_gen_divu_i64, false)
+
+TRANS_VDIV_VMOD(ISA310, VDIVSW, MO_32, do_divsw, NULL)
+TRANS_VDIV_VMOD(ISA310, VDIVUW, MO_32, do_divuw, NULL)
+TRANS_VDIV_VMOD(ISA310, VDIVSD, MO_64, NULL, do_divsd)
+TRANS_VDIV_VMOD(ISA310, VDIVUD, MO_64, NULL, do_divud)
+
+#undef DO_VDIV_VMOD
+
#undef GEN_VR_LDX
#undef GEN_VR_STX
#undef GEN_VR_LVE
--
2.31.1
On 4/5/22 12:55, Lucas Mateus Castro(alqotel) wrote:
> From: "Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br>
>
> Implement the following PowerISA v3.1 instructions:
> vdivsw: Vector Divide Signed Word
> vdivuw: Vector Divide Unsigned Word
> vdivsd: Vector Divide Signed Doubleword
> vdivud: Vector Divide Unsigned Doubleword
>
> Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
> ---
> target/ppc/insn32.decode | 7 ++++
> target/ppc/translate/vmx-impl.c.inc | 59 +++++++++++++++++++++++++++++
> 2 files changed, 66 insertions(+)
>
> diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
> index ac2d3da9a7..597768558b 100644
> --- a/target/ppc/insn32.decode
> +++ b/target/ppc/insn32.decode
> @@ -703,3 +703,10 @@ XVTLSBB 111100 ... -- 00010 ..... 111011011 . - @XX2_bf_xb
> &XL_s s:uint8_t
> @XL_s ......-------------- s:1 .......... - &XL_s
> RFEBB 010011-------------- . 0010010010 - @XL_s
> +
> +## Vector Division Instructions
> +
> +VDIVSW 000100 ..... ..... ..... 00110001011 @VX
> +VDIVUW 000100 ..... ..... ..... 00010001011 @VX
> +VDIVSD 000100 ..... ..... ..... 00111001011 @VX
> +VDIVUD 000100 ..... ..... ..... 00011001011 @VX
> diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc
> index 6101bca3fd..be35d6fdf3 100644
> --- a/target/ppc/translate/vmx-impl.c.inc
> +++ b/target/ppc/translate/vmx-impl.c.inc
> @@ -3236,6 +3236,65 @@ TRANS(VMULHSD, do_vx_mulh, true , do_vx_vmulhd_i64)
> TRANS(VMULHUW, do_vx_mulh, false, do_vx_vmulhw_i64)
> TRANS(VMULHUD, do_vx_mulh, false, do_vx_vmulhd_i64)
>
> +#define TRANS_VDIV_VMOD(FLAGS, NAME, VECE, FNI4_FUNC, FNI8_FUNC) \
> +static bool trans_##NAME(DisasContext *ctx, arg_VX *a) \
> +{ \
> + static const GVecGen3 op = { \
> + .fni4 = FNI4_FUNC, \
> + .fni8 = FNI8_FUNC, \
> + .vece = VECE \
> + }; \
> + \
> + REQUIRE_VECTOR(ctx); \
> + REQUIRE_INSNS_FLAGS2(ctx, FLAGS); \
> + \
> + tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra), \
> + avr_full_offset(a->vrb), 16, 16, &op); \
> + \
> + return true; \
> +}
Better to use a standalone helper and TRANS() -- the op structure doesn't *need* to be
static const.
> +
> +#define DO_VDIV_VMOD(NAME, SZ, DIV, SIGNED) \
> +static void NAME(TCGv_i##SZ t, TCGv_i##SZ a, TCGv_i##SZ b) \
> +{ \
> + /* \
> + * If N/0 the instruction used by the backend might deliver \
> + * an invalid division signal to the process, so if b = 0 return \
> + * N/1 and if signed instruction, the same for a = int_min, b = -1 \
> + */ \
> + if (SIGNED) { \
> + TCGv_i##SZ t0 = tcg_temp_new_i##SZ(); \
> + TCGv_i##SZ t1 = tcg_temp_new_i##SZ(); \
> + tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t0, a, INT##SZ##_MIN); \
> + tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t1, b, -1); \
> + tcg_gen_and_i##SZ(t0, t0, t1); \
> + tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t1, b, 0); \
> + tcg_gen_or_i##SZ(t0, t0, t1); \
> + tcg_gen_movi_i##SZ(t1, 0); \
> + tcg_gen_movcond_i##SZ(TCG_COND_NE, b, t0, t1, t0, b); \
> + DIV(t, a, b); \
> + tcg_temp_free_i##SZ(t0); \
> + tcg_temp_free_i##SZ(t1); \
> + } else { \
> + TCGv_i##SZ zero = tcg_constant_i##SZ(0); \
> + TCGv_i##SZ one = tcg_constant_i##SZ(1); \
> + tcg_gen_movcond_i##SZ(TCG_COND_EQ, b, b, zero, one, b); \
> + DIV(t, a, b); \
> + } \
> +}
This is overkill. Even if you keep some macros, passing in SIGNED and using it in the
outermost if is a sign you should split the macro in two.
However, only tcg_gen_div_i64 really requires the full signed treatment; tcg_gen_div_i32
can be better handled by extending to i64, because INT32_MIN / -1ULL does not trap.
I think this would be much easier to read as 4 separate functions.
r~
On 11/04/2022 22:51, Richard Henderson wrote:
>
> On 4/5/22 12:55, Lucas Mateus Castro(alqotel) wrote:
>
>> +
>> +#define DO_VDIV_VMOD(NAME, SZ, DIV,
>> SIGNED) \
>> +static void NAME(TCGv_i##SZ t, TCGv_i##SZ a, TCGv_i##SZ
>> b) \
>> +{ \
>> + /* \
>> + * If N/0 the instruction used by the backend might
>> deliver \
>> + * an invalid division signal to the process, so if b = 0
>> return \
>> + * N/1 and if signed instruction, the same for a = int_min, b =
>> -1 \
>> + */ \
>> + if (SIGNED)
>> { \
>> + TCGv_i##SZ t0 =
>> tcg_temp_new_i##SZ(); \
>> + TCGv_i##SZ t1 =
>> tcg_temp_new_i##SZ(); \
>> + tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t0, a,
>> INT##SZ##_MIN); \
>> + tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t1, b,
>> -1); \
>> + tcg_gen_and_i##SZ(t0, t0,
>> t1); \
>> + tcg_gen_setcondi_i##SZ(TCG_COND_EQ, t1, b,
>> 0); \
>> + tcg_gen_or_i##SZ(t0, t0,
>> t1); \
>> + tcg_gen_movi_i##SZ(t1,
>> 0); \
>> + tcg_gen_movcond_i##SZ(TCG_COND_NE, b, t0, t1, t0,
>> b); \
>> + DIV(t, a,
>> b); \
>> + tcg_temp_free_i##SZ(t0); \
>> + tcg_temp_free_i##SZ(t1); \
>> + } else
>> { \
>> + TCGv_i##SZ zero =
>> tcg_constant_i##SZ(0); \
>> + TCGv_i##SZ one =
>> tcg_constant_i##SZ(1); \
>> + tcg_gen_movcond_i##SZ(TCG_COND_EQ, b, b, zero, one,
>> b); \
>> + DIV(t, a,
>> b); \
>> + } \
>> +}
>
> This is overkill. Even if you keep some macros, passing in SIGNED and
> using it in the
> outermost if is a sign you should split the macro in two.
>
> However, only tcg_gen_div_i64 really requires the full signed
> treatment; tcg_gen_div_i32
> can be better handled by extending to i64, because INT32_MIN / -1ULL
> does not trap.
>
> I think this would be much easier to read as 4 separate functions.
>
>
Ok, I'll change it to 4 different macros, move clz128 to int128.h and
turn TRANS_VDIV_VMOD into do_vdiv_vmod function and call it with TRANS()
in v3
> r~
--
Lucas Mateus M. Araujo e Castro
Instituto de Pesquisas ELDORADO
<https://www.eldorado.org.br/?utm_campaign=assinatura_de_e-mail&utm_medium=email&utm_source=RD+Station>
Departamento Computação Embarcada
Analista de Software Trainee
Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
© 2016 - 2026 Red Hat, Inc.