[PATCH v3 15/37] target/ppc: implement vclrlb

matheus.ferst@eldorado.org.br posted 37 patches 3 years, 12 months ago
Maintainers: Greg Kurz <groug@kaod.org>, Richard Henderson <richard.henderson@linaro.org>, Daniel Henrique Barboza <danielhb413@gmail.com>, David Gibson <david@gibson.dropbear.id.au>, "Cédric Le Goater" <clg@kaod.org>
There is a newer version of this series
[PATCH v3 15/37] target/ppc: implement vclrlb
Posted by matheus.ferst@eldorado.org.br 3 years, 12 months ago
From: Matheus Ferst <matheus.ferst@eldorado.org.br>

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
---
 target/ppc/insn32.decode            |  2 ++
 target/ppc/translate/vmx-impl.c.inc | 56 +++++++++++++++++++++++++++++
 2 files changed, 58 insertions(+)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index ea497ecd80..483651cf9c 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -501,6 +501,8 @@ VSTRIBR         000100 ..... 00001 ..... . 0000001101   @VX_tb_rc
 VSTRIHL         000100 ..... 00010 ..... . 0000001101   @VX_tb_rc
 VSTRIHR         000100 ..... 00011 ..... . 0000001101   @VX_tb_rc
 
+VCLRLB          000100 ..... ..... ..... 00110001101    @VX
+
 # VSX Load/Store Instructions
 
 LXV             111101 ..... ..... ............ . 001   @DQ_TSX
diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc
index 8bcf637ff8..3fb4935bff 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -1956,6 +1956,62 @@ TRANS(VSTRIBR, do_vstri, gen_helper_VSTRIBR)
 TRANS(VSTRIHL, do_vstri, gen_helper_VSTRIHL)
 TRANS(VSTRIHR, do_vstri, gen_helper_VSTRIHR)
 
+static bool trans_VCLRLB(DisasContext *ctx, arg_VX *a)
+{
+    TCGv_i64 hi, lo, rb;
+    TCGLabel *l, *end;
+
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+    REQUIRE_VECTOR(ctx);
+
+    l = gen_new_label();
+    end = gen_new_label();
+
+    hi = tcg_const_local_i64(0);
+    lo = tcg_const_local_i64(0);
+    rb = tcg_temp_local_new_i64();
+
+    tcg_gen_extu_tl_i64(rb, cpu_gpr[a->vrb]);
+
+    /* RB == 0: all zeros */
+    tcg_gen_brcondi_i64(TCG_COND_EQ, rb, 0, end);
+
+    get_avr64(lo, a->vra, false);
+
+    /* RB <= 8 */
+    tcg_gen_brcondi_i64(TCG_COND_LEU, rb, 8, l);
+
+    get_avr64(hi, a->vra, true);
+
+    /* RB >= 16: just copy VRA to VRB */
+    tcg_gen_brcondi_i64(TCG_COND_GEU, rb, 16, end);
+
+    /* 8 < RB < 16: copy lo and partially clear hi */
+    tcg_gen_subfi_i64(rb, 16, rb);
+    tcg_gen_shli_i64(rb, rb, 3);
+    tcg_gen_shl_i64(hi, hi, rb);
+    tcg_gen_shr_i64(hi, hi, rb);
+    tcg_gen_br(end);
+
+    /* 0 < RB <= 8: zeroes hi and partially clears lo */
+    gen_set_label(l);
+    tcg_gen_subfi_i64(rb, 8, rb);
+    tcg_gen_shli_i64(rb, rb, 3);
+    tcg_gen_shl_i64(lo, lo, rb);
+    tcg_gen_shr_i64(lo, lo, rb);
+
+    /* Update VRT */
+    gen_set_label(end);
+    set_avr64(a->vrt, hi, true);
+    set_avr64(a->vrt, lo, false);
+
+    tcg_temp_free_i64(hi);
+    tcg_temp_free_i64(lo);
+    tcg_temp_free_i64(rb);
+
+    return true;
+}
+
 #define GEN_VAFORM_PAIRED(name0, name1, opc2)                           \
 static void glue(gen_, name0##_##name1)(DisasContext *ctx)              \
     {                                                                   \
-- 
2.31.1


Re: [PATCH v3 15/37] target/ppc: implement vclrlb
Posted by Richard Henderson 3 years, 12 months ago
On 2/10/22 23:34, matheus.ferst@eldorado.org.br wrote:
> From: Matheus Ferst <matheus.ferst@eldorado.org.br>
> 
> Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
> ---
>   target/ppc/insn32.decode            |  2 ++
>   target/ppc/translate/vmx-impl.c.inc | 56 +++++++++++++++++++++++++++++
>   2 files changed, 58 insertions(+)
> 
> diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
> index ea497ecd80..483651cf9c 100644
> --- a/target/ppc/insn32.decode
> +++ b/target/ppc/insn32.decode
> @@ -501,6 +501,8 @@ VSTRIBR         000100 ..... 00001 ..... . 0000001101   @VX_tb_rc
>   VSTRIHL         000100 ..... 00010 ..... . 0000001101   @VX_tb_rc
>   VSTRIHR         000100 ..... 00011 ..... . 0000001101   @VX_tb_rc
>   
> +VCLRLB          000100 ..... ..... ..... 00110001101    @VX
> +
>   # VSX Load/Store Instructions
>   
>   LXV             111101 ..... ..... ............ . 001   @DQ_TSX
> diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc
> index 8bcf637ff8..3fb4935bff 100644
> --- a/target/ppc/translate/vmx-impl.c.inc
> +++ b/target/ppc/translate/vmx-impl.c.inc
> @@ -1956,6 +1956,62 @@ TRANS(VSTRIBR, do_vstri, gen_helper_VSTRIBR)
>   TRANS(VSTRIHL, do_vstri, gen_helper_VSTRIHL)
>   TRANS(VSTRIHR, do_vstri, gen_helper_VSTRIHR)
>   
> +static bool trans_VCLRLB(DisasContext *ctx, arg_VX *a)
> +{
> +    TCGv_i64 hi, lo, rb;
> +    TCGLabel *l, *end;
> +
> +    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
> +    REQUIRE_VECTOR(ctx);
> +
> +    l = gen_new_label();
> +    end = gen_new_label();
> +
> +    hi = tcg_const_local_i64(0);
> +    lo = tcg_const_local_i64(0);
> +    rb = tcg_temp_local_new_i64();
> +
> +    tcg_gen_extu_tl_i64(rb, cpu_gpr[a->vrb]);
> +
> +    /* RB == 0: all zeros */
> +    tcg_gen_brcondi_i64(TCG_COND_EQ, rb, 0, end);
> +
> +    get_avr64(lo, a->vra, false);
> +
> +    /* RB <= 8 */
> +    tcg_gen_brcondi_i64(TCG_COND_LEU, rb, 8, l);
> +
> +    get_avr64(hi, a->vra, true);
> +
> +    /* RB >= 16: just copy VRA to VRB */
> +    tcg_gen_brcondi_i64(TCG_COND_GEU, rb, 16, end);
> +
> +    /* 8 < RB < 16: copy lo and partially clear hi */
> +    tcg_gen_subfi_i64(rb, 16, rb);
> +    tcg_gen_shli_i64(rb, rb, 3);
> +    tcg_gen_shl_i64(hi, hi, rb);
> +    tcg_gen_shr_i64(hi, hi, rb);
> +    tcg_gen_br(end);
> +
> +    /* 0 < RB <= 8: zeroes hi and partially clears lo */
> +    gen_set_label(l);
> +    tcg_gen_subfi_i64(rb, 8, rb);
> +    tcg_gen_shli_i64(rb, rb, 3);
> +    tcg_gen_shl_i64(lo, lo, rb);
> +    tcg_gen_shr_i64(lo, lo, rb);

There's a bit of redundancy here, and if we exploit that we can remove the branches.

Compute the mask modulo 8.  That result applies to either the first or second word, or 
neither.  Use 3 movcond to select among the cases:

    sh = (rb & 7) << 3;
    mask = ~(-1 << sh);
    ml = rb < 8 ? mask : 0;
    mh = rb < 8 ? 0 : mask;
    mh = rb < 16 ? mh : -1;
    lo &= ml;
    hi &= mh;


r~