Moving the following instructions to decodetree specification :
mulli : D-form
mul{lw, lwo, hw, hwu}[.] : XO-form
The changes were verified by validating that the tcg ops generated by those
instructions remain the same, which were captured with the '-d in_asm,op' flag.
Signed-off-by: Chinmay Rath <rathc@linux.ibm.com>
---
target/ppc/insn32.decode | 9 +++
target/ppc/translate.c | 89 ----------------------
target/ppc/translate/fixedpoint-impl.c.inc | 71 +++++++++++++++++
3 files changed, 80 insertions(+), 89 deletions(-)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index eada59f59f..0184680db8 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -193,6 +193,9 @@
&XO_ta rt ra oe:bool rc:bool
@XO_ta ...... rt:5 ra:5 ..... oe:1 ......... rc:1 &XO_ta
+&XO_tab_rc rt ra rb rc:bool
+@XO_tab_rc ...... rt:5 ra:5 rb:5 . ......... rc:1 &XO_tab_rc
+
%xx_xt 0:1 21:5
%xx_xb 1:1 11:5
%xx_xa 2:1 16:5
@@ -353,6 +356,12 @@ SUBFE 011111 ..... ..... ..... . 010001000 . @XO
SUBFME 011111 ..... ..... ----- . 011101000 . @XO_ta
SUBFZE 011111 ..... ..... ----- . 011001000 . @XO_ta
+MULLI 000111 ..... ..... ................ @D
+MULLW 011111 ..... ..... ..... 0 011101011 . @XO_tab_rc
+MULLWO 011111 ..... ..... ..... 1 011101011 . @XO_tab_rc
+MULHW 011111 ..... ..... ..... - 001001011 . @XO_tab_rc
+MULHWU 011111 ..... ..... ..... - 000001011 . @XO_tab_rc
+
## Fixed-Point Logical Instructions
CFUGED 011111 ..... ..... ..... 0011011100 - @X
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 93ffec787c..c45547a770 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -1948,90 +1948,6 @@ GEN_INT_ARITH_MODD(modud, 0x08, 0);
GEN_INT_ARITH_MODD(modsd, 0x18, 1);
#endif
-/* mulhw mulhw. */
-static void gen_mulhw(DisasContext *ctx)
-{
- TCGv_i32 t0 = tcg_temp_new_i32();
- TCGv_i32 t1 = tcg_temp_new_i32();
-
- tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);
- tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);
- tcg_gen_muls2_i32(t0, t1, t0, t1);
- tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t1);
- if (unlikely(Rc(ctx->opcode) != 0)) {
- gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
- }
-}
-
-/* mulhwu mulhwu. */
-static void gen_mulhwu(DisasContext *ctx)
-{
- TCGv_i32 t0 = tcg_temp_new_i32();
- TCGv_i32 t1 = tcg_temp_new_i32();
-
- tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);
- tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);
- tcg_gen_mulu2_i32(t0, t1, t0, t1);
- tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t1);
- if (unlikely(Rc(ctx->opcode) != 0)) {
- gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
- }
-}
-
-/* mullw mullw. */
-static void gen_mullw(DisasContext *ctx)
-{
-#if defined(TARGET_PPC64)
- TCGv_i64 t0, t1;
- t0 = tcg_temp_new_i64();
- t1 = tcg_temp_new_i64();
- tcg_gen_ext32s_tl(t0, cpu_gpr[rA(ctx->opcode)]);
- tcg_gen_ext32s_tl(t1, cpu_gpr[rB(ctx->opcode)]);
- tcg_gen_mul_i64(cpu_gpr[rD(ctx->opcode)], t0, t1);
-#else
- tcg_gen_mul_i32(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
- cpu_gpr[rB(ctx->opcode)]);
-#endif
- if (unlikely(Rc(ctx->opcode) != 0)) {
- gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
- }
-}
-
-/* mullwo mullwo. */
-static void gen_mullwo(DisasContext *ctx)
-{
- TCGv_i32 t0 = tcg_temp_new_i32();
- TCGv_i32 t1 = tcg_temp_new_i32();
-
- tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);
- tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);
- tcg_gen_muls2_i32(t0, t1, t0, t1);
-#if defined(TARGET_PPC64)
- tcg_gen_concat_i32_i64(cpu_gpr[rD(ctx->opcode)], t0, t1);
-#else
- tcg_gen_mov_i32(cpu_gpr[rD(ctx->opcode)], t0);
-#endif
-
- tcg_gen_sari_i32(t0, t0, 31);
- tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t1);
- tcg_gen_extu_i32_tl(cpu_ov, t0);
- if (is_isa300(ctx)) {
- tcg_gen_mov_tl(cpu_ov32, cpu_ov);
- }
- tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
-
- if (unlikely(Rc(ctx->opcode) != 0)) {
- gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
- }
-}
-
-/* mulli */
-static void gen_mulli(DisasContext *ctx)
-{
- tcg_gen_muli_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
- SIMM(ctx->opcode));
-}
-
#if defined(TARGET_PPC64)
/* mulhd mulhd. */
static void gen_mulhd(DisasContext *ctx)
@@ -6430,11 +6346,6 @@ GEN_HANDLER_E(cmpeqb, 0x1F, 0x00, 0x07, 0x00600000, PPC_NONE, PPC2_ISA300),
GEN_HANDLER_E(cmpb, 0x1F, 0x1C, 0x0F, 0x00000001, PPC_NONE, PPC2_ISA205),
GEN_HANDLER_E(cmprb, 0x1F, 0x00, 0x06, 0x00400001, PPC_NONE, PPC2_ISA300),
GEN_HANDLER(isel, 0x1F, 0x0F, 0xFF, 0x00000001, PPC_ISEL),
-GEN_HANDLER(mulhw, 0x1F, 0x0B, 0x02, 0x00000400, PPC_INTEGER),
-GEN_HANDLER(mulhwu, 0x1F, 0x0B, 0x00, 0x00000400, PPC_INTEGER),
-GEN_HANDLER(mullw, 0x1F, 0x0B, 0x07, 0x00000000, PPC_INTEGER),
-GEN_HANDLER(mullwo, 0x1F, 0x0B, 0x17, 0x00000000, PPC_INTEGER),
-GEN_HANDLER(mulli, 0x07, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
#if defined(TARGET_PPC64)
GEN_HANDLER(mulld, 0x1F, 0x09, 0x07, 0x00000000, PPC_64B),
#endif
diff --git a/target/ppc/translate/fixedpoint-impl.c.inc b/target/ppc/translate/fixedpoint-impl.c.inc
index 0c66465d96..e12e533c67 100644
--- a/target/ppc/translate/fixedpoint-impl.c.inc
+++ b/target/ppc/translate/fixedpoint-impl.c.inc
@@ -395,6 +395,77 @@ TRANS(SUBFE, do_subf_XO, true, true)
TRANS(SUBFME, do_subf_const_XO, tcg_constant_tl(-1LL), true, true)
TRANS(SUBFZE, do_subf_const_XO, tcg_constant_tl(0), true, true)
+static bool trans_MULLI(DisasContext *ctx, arg_MULLI *a)
+{
+ tcg_gen_muli_tl(cpu_gpr[a->rt], cpu_gpr[a->ra], a->si);
+ return true;
+}
+
+static bool trans_MULLW(DisasContext *ctx, arg_MULLW *a)
+{
+#if defined(TARGET_PPC64)
+ TCGv_i64 t0, t1;
+ t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i64();
+ tcg_gen_ext32s_tl(t0, cpu_gpr[a->ra]);
+ tcg_gen_ext32s_tl(t1, cpu_gpr[a->rb]);
+ tcg_gen_mul_i64(cpu_gpr[a->rt], t0, t1);
+#else
+ tcg_gen_mul_i32(cpu_gpr[a->rt], cpu_gpr[a->ra], cpu_gpr[a->rb]);
+#endif
+ if (unlikely(a->rc)) {
+ gen_set_Rc0(ctx, cpu_gpr[a->rt]);
+ }
+ return true;
+}
+
+static bool trans_MULLWO(DisasContext *ctx, arg_MULLWO *a)
+{
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 t1 = tcg_temp_new_i32();
+
+ tcg_gen_trunc_tl_i32(t0, cpu_gpr[a->ra]);
+ tcg_gen_trunc_tl_i32(t1, cpu_gpr[a->rb]);
+ tcg_gen_muls2_i32(t0, t1, t0, t1);
+#if defined(TARGET_PPC64)
+ tcg_gen_concat_i32_i64(cpu_gpr[a->rt], t0, t1);
+#else
+ tcg_gen_mov_i32(cpu_gpr[a->rt], t0);
+#endif
+
+ tcg_gen_sari_i32(t0, t0, 31);
+ tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t1);
+ tcg_gen_extu_i32_tl(cpu_ov, t0);
+ if (is_isa300(ctx)) {
+ tcg_gen_mov_tl(cpu_ov32, cpu_ov);
+ }
+ tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
+
+ if (unlikely(a->rc)) {
+ gen_set_Rc0(ctx, cpu_gpr[a->rt]);
+ }
+ return true;
+}
+
+static bool do_mulhw(DisasContext *ctx, arg_XO_tab_rc *a,
+ void (*helper)(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1,
+ TCGv_i32 arg2))
+{
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ tcg_gen_trunc_tl_i32(t0, cpu_gpr[a->ra]);
+ tcg_gen_trunc_tl_i32(t1, cpu_gpr[a->rb]);
+ helper(t0, t1, t0, t1);
+ tcg_gen_extu_i32_tl(cpu_gpr[a->rt], t1);
+ if (unlikely(a->rc)) {
+ gen_set_Rc0(ctx, cpu_gpr[a->rt]);
+ }
+ return true;
+}
+
+TRANS(MULHW, do_mulhw, tcg_gen_muls2_i32)
+TRANS(MULHWU, do_mulhw, tcg_gen_mulu2_i32)
+
static bool trans_INVALID(DisasContext *ctx, arg_INVALID *a)
{
gen_invalid(ctx);
--
2.39.3
On 4/15/24 23:39, Chinmay Rath wrote:
> Moving the following instructions to decodetree specification :
> mulli : D-form
> mul{lw, lwo, hw, hwu}[.] : XO-form
>
> The changes were verified by validating that the tcg ops generated by those
> instructions remain the same, which were captured with the '-d in_asm,op' flag.
>
> Signed-off-by: Chinmay Rath <rathc@linux.ibm.com>
> ---
> target/ppc/insn32.decode | 9 +++
> target/ppc/translate.c | 89 ----------------------
> target/ppc/translate/fixedpoint-impl.c.inc | 71 +++++++++++++++++
> 3 files changed, 80 insertions(+), 89 deletions(-)
This is an accurate reorg of the current code, so
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
However, as follow-up, the code generation could be cleaned up:
> +static bool trans_MULLW(DisasContext *ctx, arg_MULLW *a)
> +{
> +#if defined(TARGET_PPC64)
> + TCGv_i64 t0, t1;
> + t0 = tcg_temp_new_i64();
> + t1 = tcg_temp_new_i64();
> + tcg_gen_ext32s_tl(t0, cpu_gpr[a->ra]);
> + tcg_gen_ext32s_tl(t1, cpu_gpr[a->rb]);
> + tcg_gen_mul_i64(cpu_gpr[a->rt], t0, t1);
> +#else
> + tcg_gen_mul_i32(cpu_gpr[a->rt], cpu_gpr[a->ra], cpu_gpr[a->rb]);
> +#endif
> + if (unlikely(a->rc)) {
> + gen_set_Rc0(ctx, cpu_gpr[a->rt]);
> + }
> + return true;
> +}
Without ifdefs:
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
tcg_gen_ext32s_tl(t0, ra);
tcg_gen_ext32s_tl(t1, rb);
tcg_gen_mul_tl(rt, t0, t1);
For ppc32, ext32s_tl will turn into a mov, which will be optimized away. So ideal code
generation for both modes.
> +static bool trans_MULLWO(DisasContext *ctx, arg_MULLWO *a)
> +{
> + TCGv_i32 t0 = tcg_temp_new_i32();
> + TCGv_i32 t1 = tcg_temp_new_i32();
> +
> + tcg_gen_trunc_tl_i32(t0, cpu_gpr[a->ra]);
> + tcg_gen_trunc_tl_i32(t1, cpu_gpr[a->rb]);
> + tcg_gen_muls2_i32(t0, t1, t0, t1);
> +#if defined(TARGET_PPC64)
> + tcg_gen_concat_i32_i64(cpu_gpr[a->rt], t0, t1);
> +#else
> + tcg_gen_mov_i32(cpu_gpr[a->rt], t0);
> +#endif
> +
> + tcg_gen_sari_i32(t0, t0, 31);
> + tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t1);
> + tcg_gen_extu_i32_tl(cpu_ov, t0);
Usually hosts need to create the full 64-bit product and then break it apart for
tcg_gen_muls2_i32, so split followed immediately by concatenate isn't great.
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
#ifdef TARGET_PPC64
tcg_gen_ext32s_i64(t0, ra);
tcg_gen_ext32s_i64(t1, rb);
tcg_gen_mul_i64(rt, t0, t1);
tcg_gen_sextract_i64(t0, rt, 31, 1);
tcg_gen_sari_i64(t1, rt, 32);
#else
tcg_gen_muls2_i32(rt, t1, ra, rb);
tcg_gen_sari_i32(t0, rt, 31);
#endif
tcg_gen_setcond_tl(TCG_COND_NE, cpu_ov, t0, t1);
> + if (is_isa300(ctx)) {
> + tcg_gen_mov_tl(cpu_ov32, cpu_ov);
> + }
> + tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
> +
> + if (unlikely(a->rc)) {
> + gen_set_Rc0(ctx, cpu_gpr[a->rt]);
> + }
> + return true;
> +}
r~
Hi Richard,
On 4/16/24 23:26, Richard Henderson wrote:
> On 4/15/24 23:39, Chinmay Rath wrote:
>> Moving the following instructions to decodetree specification :
>> mulli : D-form
>> mul{lw, lwo, hw, hwu}[.] : XO-form
>>
>> The changes were verified by validating that the tcg ops generated by
>> those
>> instructions remain the same, which were captured with the '-d
>> in_asm,op' flag.
>>
>> Signed-off-by: Chinmay Rath <rathc@linux.ibm.com>
>> ---
>> target/ppc/insn32.decode | 9 +++
>> target/ppc/translate.c | 89 ----------------------
>> target/ppc/translate/fixedpoint-impl.c.inc | 71 +++++++++++++++++
>> 3 files changed, 80 insertions(+), 89 deletions(-)
>
> This is an accurate reorg of the current code, so
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Thank you.
>
> However, as follow-up, the code generation could be cleaned up:
>
>
>> +static bool trans_MULLW(DisasContext *ctx, arg_MULLW *a)
>> +{
>> +#if defined(TARGET_PPC64)
>> + TCGv_i64 t0, t1;
>> + t0 = tcg_temp_new_i64();
>> + t1 = tcg_temp_new_i64();
>> + tcg_gen_ext32s_tl(t0, cpu_gpr[a->ra]);
>> + tcg_gen_ext32s_tl(t1, cpu_gpr[a->rb]);
>> + tcg_gen_mul_i64(cpu_gpr[a->rt], t0, t1);
>> +#else
>> + tcg_gen_mul_i32(cpu_gpr[a->rt], cpu_gpr[a->ra], cpu_gpr[a->rb]);
>> +#endif
>> + if (unlikely(a->rc)) {
>> + gen_set_Rc0(ctx, cpu_gpr[a->rt]);
>> + }
>> + return true;
>> +}
>
> Without ifdefs:
>
> TCGv t0 = tcg_temp_new();
> TCGv t1 = tcg_temp_new();
>
> tcg_gen_ext32s_tl(t0, ra);
> tcg_gen_ext32s_tl(t1, rb);
> tcg_gen_mul_tl(rt, t0, t1);
>
> For ppc32, ext32s_tl will turn into a mov, which will be optimized
> away. So ideal code generation for both modes.
>
>
>> +static bool trans_MULLWO(DisasContext *ctx, arg_MULLWO *a)
>> +{
>> + TCGv_i32 t0 = tcg_temp_new_i32();
>> + TCGv_i32 t1 = tcg_temp_new_i32();
>> +
>> + tcg_gen_trunc_tl_i32(t0, cpu_gpr[a->ra]);
>> + tcg_gen_trunc_tl_i32(t1, cpu_gpr[a->rb]);
>> + tcg_gen_muls2_i32(t0, t1, t0, t1);
>> +#if defined(TARGET_PPC64)
>> + tcg_gen_concat_i32_i64(cpu_gpr[a->rt], t0, t1);
>> +#else
>> + tcg_gen_mov_i32(cpu_gpr[a->rt], t0);
>> +#endif
>> +
>> + tcg_gen_sari_i32(t0, t0, 31);
>> + tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t1);
>> + tcg_gen_extu_i32_tl(cpu_ov, t0);
>
> Usually hosts need to create the full 64-bit product and then break it
> apart for tcg_gen_muls2_i32, so split followed immediately by
> concatenate isn't great.
>
>
> TCGv t0 = tcg_temp_new();
> TCGv t1 = tcg_temp_new();
>
> #ifdef TARGET_PPC64
> tcg_gen_ext32s_i64(t0, ra);
> tcg_gen_ext32s_i64(t1, rb);
> tcg_gen_mul_i64(rt, t0, t1);
> tcg_gen_sextract_i64(t0, rt, 31, 1);
> tcg_gen_sari_i64(t1, rt, 32);
> #else
> tcg_gen_muls2_i32(rt, t1, ra, rb);
> tcg_gen_sari_i32(t0, rt, 31);
> #endif
> tcg_gen_setcond_tl(TCG_COND_NE, cpu_ov, t0, t1);
>
Sure, will update in v2.
Thanks,
Chinmay
>
>> + if (is_isa300(ctx)) {
>> + tcg_gen_mov_tl(cpu_ov32, cpu_ov);
>> + }
>> + tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
>> +
>> + if (unlikely(a->rc)) {
>> + gen_set_Rc0(ctx, cpu_gpr[a->rt]);
>> + }
>> + return true;
>> +}
>
>
> r~
>
© 2016 - 2026 Red Hat, Inc.