[PATCH v4 29/31] target/ppc: Implement cfuged instruction

matheus.ferst@eldorado.org.br posted 31 patches 4 years, 9 months ago
There is a newer version of this series
[PATCH v4 29/31] target/ppc: Implement cfuged instruction
Posted by matheus.ferst@eldorado.org.br 4 years, 9 months ago
From: Matheus Ferst <matheus.ferst@eldorado.org.br>

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
---
 target/ppc/helper.h                        |  1 +
 target/ppc/insn32.decode                   |  4 +++
 target/ppc/int_helper.c                    | 39 ++++++++++++++++++++++
 target/ppc/translate/fixedpoint-impl.c.inc | 16 +++++++--
 4 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index ea9f2a236c..c517b9f025 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -46,6 +46,7 @@ DEF_HELPER_4(divwe, tl, env, tl, tl, i32)
 DEF_HELPER_FLAGS_1(popcntb, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_2(cmpb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
 DEF_HELPER_3(sraw, tl, env, tl, tl)
+DEF_HELPER_FLAGS_2(cfuged, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 #if defined(TARGET_PPC64)
 DEF_HELPER_FLAGS_2(cmpeqb, TCG_CALL_NO_RWG_SE, i32, tl, tl)
 DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index d69c0bc14c..64788e2a4b 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -87,6 +87,10 @@ STDUX           011111 ..... ..... ..... 0010110101 -   @X
 ADDI            001110 ..... ..... ................     @D
 ADDIS           001111 ..... ..... ................     @D
 
+## Fixed-Point Logical Instructions
+
+CFUGED          011111 ..... ..... ..... 0011011100 -   @X
+
 ### Move To/From System Register Instructions
 
 SETBC           011111 ..... ..... ----- 0110000000 -   @X_bi
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index a44c2d90ea..d1cfb915ae 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -320,6 +320,45 @@ target_ulong helper_popcntb(target_ulong val)
 }
 #endif
 
+uint64_t helper_cfuged(uint64_t src, uint64_t mask)
+{
+    target_ulong m, left = 0, right = 0;
+    unsigned int n, i = 64;
+    bool bit = 0;
+
+    if (mask == 0 || mask == -1) {
+        return src;
+    }
+
+    while (i) {
+        n = ctz64(mask);
+        if (n > i) {
+            n = i;
+        }
+
+        m = (1ll << n) - 1;
+        if (bit) {
+            right = ror64(right | (src & m), n);
+        } else {
+            left = ror64(left | (src & m), n);
+        }
+
+        src >>= n;
+        mask >>= n;
+        i -= n;
+        bit = !bit;
+        mask = ~mask;
+    }
+
+    if (bit) {
+        n = ctpop64(mask);
+    } else {
+        n = 64 - ctpop64(mask);
+    }
+
+    return left | (right >> n);
+}
+
 /*****************************************************************************/
 /* PowerPC 601 specific instructions (POWER bridge) */
 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
diff --git a/target/ppc/translate/fixedpoint-impl.c.inc b/target/ppc/translate/fixedpoint-impl.c.inc
index 37dd25148c..4617f7356b 100644
--- a/target/ppc/translate/fixedpoint-impl.c.inc
+++ b/target/ppc/translate/fixedpoint-impl.c.inc
@@ -210,8 +210,8 @@ static bool do_set_bool_cond(DisasContext *ctx, arg_X_bi *a, bool neg, bool rev)
 
     tcg_gen_extu_i32_tl(temp, cpu_crf[a->bi >> 2]);
     tcg_gen_andi_tl(temp, temp, mask);
-    tcg_gen_movcond_tl(a->r?TCG_COND_EQ:TCG_COND_NE, cpu_gpr[a->rt], temp,
-                       tcg_constant_tl(0), tcg_constant_tl(a->n?-1:1),
+    tcg_gen_movcond_tl(rev?TCG_COND_EQ:TCG_COND_NE, cpu_gpr[a->rt], temp,
+                       tcg_constant_tl(0), tcg_constant_tl(neg?-1:1),
                        tcg_constant_tl(0));
     tcg_temp_free(temp);
 
@@ -222,3 +222,15 @@ TRANS(SETBC, do_set_bool_cond, false, false)
 TRANS(SETBCR, do_set_bool_cond, false, true)
 TRANS(SETNBC, do_set_bool_cond, true, false)
 TRANS(SETNBCR, do_set_bool_cond, true, true)
+
+static bool trans_CFUGED(DisasContext *ctx, arg_X *a)
+{
+    REQUIRE_64BIT(ctx);
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+#if defined(TARGET_PPC64)
+    gen_helper_cfuged(cpu_gpr[a->ra], cpu_gpr[a->rt], cpu_gpr[a->rb]);
+#else
+    gen_invalid(ctx);
+#endif
+    return true;
+}
-- 
2.25.1


Re: [PATCH v4 29/31] target/ppc: Implement cfuged instruction
Posted by Richard Henderson 4 years, 9 months ago
On 5/12/21 1:54 PM, matheus.ferst@eldorado.org.br wrote:
> +    while (i) {
> +        n = ctz64(mask);
> +        if (n > i) {
> +            n = i;
> +        }
> +
> +        m = (1ll << n) - 1;
> +        if (bit) {
> +            right = ror64(right | (src & m), n);
> +        } else {
> +            left = ror64(left | (src & m), n);
> +        }
> +
> +        src >>= n;
> +        mask >>= n;
> +        i -= n;
> +        bit = !bit;
> +        mask = ~mask;
> +    }
> +
> +    if (bit) {
> +        n = ctpop64(mask);
> +    } else {
> +        n = 64 - ctpop64(mask);
> +    }
> +
> +    return left | (right >> n);
> +}

This doesn't correspond to the algorithm presented in the manual.  Thus this 
requires lots of extra commentary.

I guess I see how you're trying to process blocks at a time, instead of single 
bits at a time.  But I don't think the merging of data into "right" and "left" 
looks right.  I would have expected

     right = (right << n) | (src & m);

and similarly for left.

It doesn't look like that the ctpop at the end is correct, given how mask has 
been modified.  I would have thought that

     n = ctpop64(orig_mask);
     return (left << n) | right;

would be the correct answer.

I could be wrong about the above, but that's what the missing commentary should 
have helped me understand.

> +static bool trans_CFUGED(DisasContext *ctx, arg_X *a)
> +{
> +    REQUIRE_64BIT(ctx);
> +    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
> +#if defined(TARGET_PPC64)
> +    gen_helper_cfuged(cpu_gpr[a->ra], cpu_gpr[a->rt], cpu_gpr[a->rb]);
> +#else
> +    gen_invalid(ctx);
> +#endif
> +    return true;
> +}

Given that this helper will also be used by vcfuged, there's no point in hiding 
it in a TARGET_PPC64 block, and thus you can drop the ifdefs.


r~


Re: [PATCH v4 29/31] target/ppc: Implement cfuged instruction
Posted by Matheus K. Ferst 4 years, 9 months ago
On 13/05/2021 08:31, Richard Henderson wrote:
> On 5/12/21 1:54 PM, matheus.ferst@eldorado.org.br wrote:
>> +    while (i) {
>> +        n = ctz64(mask);
>> +        if (n > i) {
>> +            n = i;
>> +        }
>> +
>> +        m = (1ll << n) - 1;
>> +        if (bit) {
>> +            right = ror64(right | (src & m), n);
>> +        } else {
>> +            left = ror64(left | (src & m), n);
>> +        }
>> +
>> +        src >>= n;
>> +        mask >>= n;
>> +        i -= n;
>> +        bit = !bit;
>> +        mask = ~mask;
>> +    }
>> +
>> +    if (bit) {
>> +        n = ctpop64(mask);
>> +    } else {
>> +        n = 64 - ctpop64(mask);
>> +    }
>> +
>> +    return left | (right >> n);
>> +}
> 
> This doesn't correspond to the algorithm presented in the manual.  Thus 
> this requires lots of extra commentary.
> 
> I guess I see how you're trying to process blocks at a time, instead of 
> single bits at a time.  But I don't think the merging of data into 
> "right" and "left" looks right.  I would have expected
> 
>      right = (right << n) | (src & m);
> 
> and similarly for left.
> 
> It doesn't look like that the ctpop at the end is correct, given how 
> mask has been modified.  I would have thought that
> 
>      n = ctpop64(orig_mask);
>      return (left << n) | right;
> 
> would be the correct answer.
> 
> I could be wrong about the above, but that's what the missing commentary 
> should have helped me understand.
> 

It sure worth more comments. Yes, the idea is to process in blocks, and 
we negate the mask to avoid deciding between ctz and cto inside the 
loop. We use rotate instead of shift so it don't change the number of 
zeros and ones, and then we don't need orig_mask.

You'll find my test cases for cfuged and vcfuged on 
https://github.com/PPC64/qemu/blob/ferst-tcg-cfuged/tests/tcg/ppc64le/ . 
I got the same results by running them with this implementation and with 
the Power10 Functional Simulator.

>> +static bool trans_CFUGED(DisasContext *ctx, arg_X *a)
>> +{
>> +    REQUIRE_64BIT(ctx);
>> +    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
>> +#if defined(TARGET_PPC64)
>> +    gen_helper_cfuged(cpu_gpr[a->ra], cpu_gpr[a->rt], cpu_gpr[a->rb]);
>> +#else
>> +    gen_invalid(ctx);
>> +#endif
>> +    return true;
>> +}
> 
> Given that this helper will also be used by vcfuged, there's no point in 
> hiding it in a TARGET_PPC64 block, and thus you can drop the ifdefs.
> 
> 
> r~
> 

If I remove it, the build for ppc will fail, because cpu_gpr is declared 
as TCGv, and the helper uses i64 to match {get,set}_cpu_vsr{l,h}. 
REQUIRE_64BIT makes the helper call unreachable for ppc, but it's a 
runtime check. At build time, the compiler will check the types anyway, 
and give us an error.

-- 
Matheus K. Ferst
Instituto de Pesquisas ELDORADO <http://www.eldorado.org.br/>
Analista de Software Júnior
Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>

Re: [PATCH v4 29/31] target/ppc: Implement cfuged instruction
Posted by Richard Henderson 4 years, 9 months ago
On 5/13/21 7:24 AM, Matheus K. Ferst wrote:
>>> +static bool trans_CFUGED(DisasContext *ctx, arg_X *a)
>>> +{
>>> +    REQUIRE_64BIT(ctx);
>>> +    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
>>> +#if defined(TARGET_PPC64)
>>> +    gen_helper_cfuged(cpu_gpr[a->ra], cpu_gpr[a->rt], cpu_gpr[a->rb]);
>>> +#else
>>> +    gen_invalid(ctx);
>>> +#endif
>>> +    return true;
>>> +}
>>
>> Given that this helper will also be used by vcfuged, there's no point in 
>> hiding it in a TARGET_PPC64 block, and thus you can drop the ifdefs.
>>
>>
>> r~
>>
> 
> If I remove it, the build for ppc will fail, because cpu_gpr is declared as 
> TCGv, and the helper uses i64 to match {get,set}_cpu_vsr{l,h}. REQUIRE_64BIT 
> makes the helper call unreachable for ppc, but it's a runtime check. At build 
> time, the compiler will check the types anyway, and give us an error.

Hmm, yes.  Just change the gen_invalid above to qemu_build_not_reached().


r~