[v1] target/ppc: Implement Vector Expand/Extract Mask and Vector Mask

[PATCH 2/3] target/ppc: Implement Vector Extract Mask

Posted by matheus.ferst@eldorado.org.br 4 years, 3 months ago

From: Matheus Ferst <matheus.ferst@eldorado.org.br>

Implement the following PowerISA v3.1 instructions:
vextractbm: Vector Extract Byte Mask
vextracthm: Vector Extract Halfword Mask
vextractwm: Vector Extract Word Mask
vextractdm: Vector Extract Doubleword Mask
vextractqm: Vector Extract Quadword Mask

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
---
 target/ppc/insn32.decode            |  6 ++
 target/ppc/translate/vmx-impl.c.inc | 85 +++++++++++++++++++++++++++++
 2 files changed, 91 insertions(+)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 9a28f1d266..639ac22bf0 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -419,6 +419,12 @@ VEXPANDWM       000100 ..... 00010 ..... 11001000010    @VX_tb
 VEXPANDDM       000100 ..... 00011 ..... 11001000010    @VX_tb
 VEXPANDQM       000100 ..... 00100 ..... 11001000010    @VX_tb
 
+VEXTRACTBM      000100 ..... 01000 ..... 11001000010    @VX_tb
+VEXTRACTHM      000100 ..... 01001 ..... 11001000010    @VX_tb
+VEXTRACTWM      000100 ..... 01010 ..... 11001000010    @VX_tb
+VEXTRACTDM      000100 ..... 01011 ..... 11001000010    @VX_tb
+VEXTRACTQM      000100 ..... 01100 ..... 11001000010    @VX_tb
+
 # VSX Load/Store Instructions
 
 LXV             111101 ..... ..... ............ . 001   @DQ_TSX
diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc
index 58aca58f0f..c6a30614fb 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -1539,6 +1539,91 @@ static bool trans_VEXPANDQM(DisasContext *ctx, arg_VX_tb *a)
     return true;
 }
 
+static bool do_vextractm(DisasContext *ctx, arg_VX_tb *a, unsigned vece)
+{
+    const uint64_t elem_length = 8 << vece, elem_num = 15 >> vece;
+    int i = elem_num;
+    uint64_t bit;
+    TCGv_i64 t, b, tmp, zero;
+
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+    REQUIRE_VECTOR(ctx);
+
+    t = tcg_const_i64(0);
+    b = tcg_temp_new_i64();
+    tmp = tcg_temp_new_i64();
+    zero = tcg_constant_i64(0);
+
+    get_avr64(b, a->vrb, true);
+    for (bit = 1ULL << 63; i > elem_num / 2; i--, bit >>= elem_length) {
+        tcg_gen_shli_i64(t, t, 1);
+        tcg_gen_andi_i64(tmp, b, bit);
+        tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, zero);
+        tcg_gen_or_i64(t, t, tmp);
+    }
+
+    get_avr64(b, a->vrb, false);
+    for (bit = 1ULL << 63; i >= 0; i--, bit >>= elem_length) {
+        tcg_gen_shli_i64(t, t, 1);
+        tcg_gen_andi_i64(tmp, b, bit);
+        tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, zero);
+        tcg_gen_or_i64(t, t, tmp);
+    }
+
+    tcg_gen_trunc_i64_tl(cpu_gpr[a->vrt], t);
+
+    tcg_temp_free_i64(t);
+    tcg_temp_free_i64(b);
+    tcg_temp_free_i64(tmp);
+
+    return true;
+}
+
+TRANS(VEXTRACTBM, do_vextractm, MO_8)
+TRANS(VEXTRACTHM, do_vextractm, MO_16)
+TRANS(VEXTRACTWM, do_vextractm, MO_32)
+
+static bool trans_VEXTRACTDM(DisasContext *ctx, arg_VX_tb *a)
+{
+    TCGv_i64 t, b;
+
+    t = tcg_temp_new_i64();
+    b = tcg_temp_new_i64();
+
+    get_avr64(b, a->vrb, true);
+    tcg_gen_andi_i64(t, b, 1);
+    tcg_gen_shli_i64(t, t, 1);
+
+    get_avr64(b, a->vrb, false);
+    tcg_gen_andi_i64(b, b, 1);
+    tcg_gen_or_i64(t, t, b);
+
+    tcg_gen_trunc_i64_tl(cpu_gpr[a->vrt], t);
+
+    tcg_temp_free_i64(t);
+    tcg_temp_free_i64(b);
+
+    return true;
+}
+
+static bool trans_VEXTRACTQM(DisasContext *ctx, arg_VX_tb *a)
+{
+    TCGv_i64 tmp;
+
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+    REQUIRE_VECTOR(ctx);
+
+    tmp = tcg_temp_new_i64();
+
+    get_avr64(tmp, a->vrb, true);
+    tcg_gen_shri_i64(tmp, tmp, 63);
+    tcg_gen_trunc_i64_tl(cpu_gpr[a->vrt], tmp);
+
+    tcg_temp_free_i64(tmp);
+
+    return true;
+}
+
 #define GEN_VAFORM_PAIRED(name0, name1, opc2)                           \
 static void glue(gen_, name0##_##name1)(DisasContext *ctx)              \
     {                                                                   \
-- 
2.25.1

Re: [PATCH 2/3] target/ppc: Implement Vector Extract Mask

Posted by Richard Henderson 4 years, 3 months ago

On 11/10/21 7:56 PM, matheus.ferst@eldorado.org.br wrote:
> From: Matheus Ferst <matheus.ferst@eldorado.org.br>
> 
> Implement the following PowerISA v3.1 instructions:
> vextractbm: Vector Extract Byte Mask
> vextracthm: Vector Extract Halfword Mask
> vextractwm: Vector Extract Word Mask
> vextractdm: Vector Extract Doubleword Mask
> vextractqm: Vector Extract Quadword Mask
> 
> Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
> ---
>   target/ppc/insn32.decode            |  6 ++
>   target/ppc/translate/vmx-impl.c.inc | 85 +++++++++++++++++++++++++++++
>   2 files changed, 91 insertions(+)
> 
> diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
> index 9a28f1d266..639ac22bf0 100644
> --- a/target/ppc/insn32.decode
> +++ b/target/ppc/insn32.decode
> @@ -419,6 +419,12 @@ VEXPANDWM       000100 ..... 00010 ..... 11001000010    @VX_tb
>   VEXPANDDM       000100 ..... 00011 ..... 11001000010    @VX_tb
>   VEXPANDQM       000100 ..... 00100 ..... 11001000010    @VX_tb
>   
> +VEXTRACTBM      000100 ..... 01000 ..... 11001000010    @VX_tb
> +VEXTRACTHM      000100 ..... 01001 ..... 11001000010    @VX_tb
> +VEXTRACTWM      000100 ..... 01010 ..... 11001000010    @VX_tb
> +VEXTRACTDM      000100 ..... 01011 ..... 11001000010    @VX_tb
> +VEXTRACTQM      000100 ..... 01100 ..... 11001000010    @VX_tb
> +
>   # VSX Load/Store Instructions
>   
>   LXV             111101 ..... ..... ............ . 001   @DQ_TSX
> diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc
> index 58aca58f0f..c6a30614fb 100644
> --- a/target/ppc/translate/vmx-impl.c.inc
> +++ b/target/ppc/translate/vmx-impl.c.inc
> @@ -1539,6 +1539,91 @@ static bool trans_VEXPANDQM(DisasContext *ctx, arg_VX_tb *a)
>       return true;
>   }
>   
> +static bool do_vextractm(DisasContext *ctx, arg_VX_tb *a, unsigned vece)
> +{
> +    const uint64_t elem_length = 8 << vece, elem_num = 15 >> vece;
> +    int i = elem_num;
> +    uint64_t bit;
> +    TCGv_i64 t, b, tmp, zero;
> +
> +    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
> +    REQUIRE_VECTOR(ctx);
> +
> +    t = tcg_const_i64(0);
> +    b = tcg_temp_new_i64();
> +    tmp = tcg_temp_new_i64();
> +    zero = tcg_constant_i64(0);
> +
> +    get_avr64(b, a->vrb, true);
> +    for (bit = 1ULL << 63; i > elem_num / 2; i--, bit >>= elem_length) {
> +        tcg_gen_shli_i64(t, t, 1);
> +        tcg_gen_andi_i64(tmp, b, bit);
> +        tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, zero);
> +        tcg_gen_or_i64(t, t, tmp);
> +    }

This is over-complicated.  Shift b into the correct position, isolate the correct bit, or 
it into the result.

     int ele_width = 8 << vece;
     int ele_count_half = 8 >> vece;

     tcg_gen_movi_i64(r, 0);
     for (int w = 0; w < 2; w++) {
         get_avr64(v, a->vrb, w);

         for (int i = 0; i < ele_count_half; ++i) {
             int b_in = i * ele_width - 1;
             int b_out = w * ele_count_half + i;

             tcg_gen_shri_i64(t, v, b_in - b_out);
             tcg_gen_andi_i64(t, t, 1 << b_out);
             tcg_gen_or_i64(r, r, t);
         }
     }
     tcg_gen_trunc_i64_tl(gpr, r);


> +TRANS(VEXTRACTBM, do_vextractm, MO_8)
> +TRANS(VEXTRACTHM, do_vextractm, MO_16)
> +TRANS(VEXTRACTWM, do_vextractm, MO_32)
> +
> +static bool trans_VEXTRACTDM(DisasContext *ctx, arg_VX_tb *a)

Should be able to use the common routine above as well.


r~

[PATCH 1/3] target/ppc: Implement Vector Expand Mask
[PATCH 2/3] target/ppc: Implement Vector Extract Mask
[PATCH 3/3] target/ppc: Implement Vector Mask Move insns