[Qemu-devel] [PATCH v3 06/13] target/mips: Add emulation of MMI instruction PEXEH

Mateja Marjanovic posted 13 patches 6 years, 8 months ago
Maintainers: Aleksandar Rikalo <arikalo@wavecomp.com>, Aurelien Jarno <aurelien@aurel32.net>, Aleksandar Markovic <amarkovic@wavecomp.com>
[Qemu-devel] [PATCH v3 06/13] target/mips: Add emulation of MMI instruction PEXEH
Posted by Mateja Marjanovic 6 years, 8 months ago
From: Mateja Marjanovic <Mateja.Marjanovic@rt-rk.com>

Add emulation of MMI instruction PEXEH. The emulation is implemented
using TCG front end operations directly to achieve better performance.

Signed-off-by: Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
---
 target/mips/translate.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 74 insertions(+), 1 deletion(-)

diff --git a/target/mips/translate.c b/target/mips/translate.c
index 64eb10c..01efcde 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -24650,6 +24650,77 @@ static void gen_mmi_pexcw(DisasContext *ctx)
     }
 }
 
+/*
+ *  PEXEH rd, rt
+ *
+ *  Parallel Exchange Even Halfword
+ *
+ *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ *  +-----------+---------+---------+---------+---------+-----------+
+ *  |    MMI    |0 0 0 0 0|   rt    |   rd    |  PEXEH  |    MMI2   |
+ *  +-----------+---------+---------+---------+---------+-----------+
+ */
+
+static void gen_mmi_pexeh(DisasContext *ctx)
+{
+    uint32_t pd, rt, rd;
+    uint32_t opcode;
+
+    opcode = ctx->opcode;
+
+    pd = extract32(opcode, 21, 5);
+    rt = extract32(opcode, 16, 5);
+    rd = extract32(opcode, 11, 5);
+
+    if (unlikely(pd != 0)) {
+        generate_exception_end(ctx, EXCP_RI);
+    } else if (rd == 0) {
+        /* nop */
+    } else if (rt == 0) {
+        tcg_gen_movi_i64(cpu_gpr[rd], 0);
+        tcg_gen_movi_i64(cpu_mmr[rd], 0);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new();
+        TCGv_i64 t1 = tcg_temp_new();
+        uint64_t mask0 = (1ULL << 16) - 1;
+        uint64_t mask1 = mask0 << 16;
+        uint64_t mask2 = mask1 << 16;
+        uint64_t mask3 = mask2 << 16;
+
+        tcg_gen_movi_i64(t1, 0);
+
+        tcg_gen_andi_i64(t0, cpu_gpr[rt], mask0);
+        tcg_gen_shli_i64(t0, t0, 32);
+        tcg_gen_or_i64(t1, t0, t1);
+        tcg_gen_andi_i64(t0, cpu_gpr[rt], mask1);
+        tcg_gen_or_i64(t1, t0, t1);
+        tcg_gen_andi_i64(t0, cpu_gpr[rt], mask2);
+        tcg_gen_shri_i64(t0, t0, 32);
+        tcg_gen_or_i64(t1, t0, t1);
+        tcg_gen_andi_i64(t0, cpu_gpr[rt], mask3);
+        tcg_gen_or_i64(t1, t0, t1);
+
+        tcg_gen_mov_i64(cpu_gpr[rd], t1);
+        tcg_gen_movi_i64(t1, 0);
+
+        tcg_gen_andi_i64(t0, cpu_mmr[rt], mask0);
+        tcg_gen_shli_i64(t0, t0, 32);
+        tcg_gen_or_i64(t1, t0, t1);
+        tcg_gen_andi_i64(t0, cpu_mmr[rt], mask1);
+        tcg_gen_or_i64(t1, t0, t1);
+        tcg_gen_andi_i64(t0, cpu_mmr[rt], mask2);
+        tcg_gen_shri_i64(t0, t0, 32);
+        tcg_gen_or_i64(t1, t0, t1);
+        tcg_gen_andi_i64(t0, cpu_mmr[rt], mask3);
+        tcg_gen_or_i64(t1, t0, t1);
+
+        tcg_gen_mov_i64(cpu_mmr[rd], t1);
+
+        tcg_temp_free(t0);
+        tcg_temp_free(t1);
+    }
+}
+
 #endif
 
 
@@ -27670,7 +27741,6 @@ static void decode_mmi2(CPUMIPSState *env, DisasContext *ctx)
     case MMI_OPC_2_PXOR:      /* TODO: MMI_OPC_2_PXOR */
     case MMI_OPC_2_PMSUBH:    /* TODO: MMI_OPC_2_PMSUBH */
     case MMI_OPC_2_PHMSBH:    /* TODO: MMI_OPC_2_PHMSBH */
-    case MMI_OPC_2_PEXEH:     /* TODO: MMI_OPC_2_PEXEH */
     case MMI_OPC_2_PREVH:     /* TODO: MMI_OPC_2_PREVH */
     case MMI_OPC_2_PMULTH:    /* TODO: MMI_OPC_2_PMULTH */
     case MMI_OPC_2_PDIVBW:    /* TODO: MMI_OPC_2_PDIVBW */
@@ -27681,6 +27751,9 @@ static void decode_mmi2(CPUMIPSState *env, DisasContext *ctx)
     case MMI_OPC_2_PCPYLD:
         gen_mmi_pcpyld(ctx);
         break;
+    case MMI_OPC_2_PEXEH:
+        gen_mmi_pexeh(ctx);
+        break;
     default:
         MIPS_INVAL("TX79 MMI class MMI2");
         generate_exception_end(ctx, EXCP_RI);
-- 
2.7.4


Re: [Qemu-devel] [PATCH v3 06/13] target/mips: Add emulation of MMI instruction PEXEH
Posted by Aleksandar Markovic 6 years, 8 months ago
> From: Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
> Subject: [PATCH v3 06/13] target/mips: Add emulation of MMI instruction PEXEH
> 
> From: Mateja Marjanovic <Mateja.Marjanovic@rt-rk.com>
> 
> Add emulation of MMI instruction PEXEH. The emulation is implemented
> using TCG front end operations directly to achieve better performance.
> 
> Signed-off-by: Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
> ---
>  target/mips/translate.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 74 insertions(+), 1 deletion(-)
> 
> diff --git a/target/mips/translate.c b/target/mips/translate.c
> index 64eb10c..01efcde 100644
> --- a/target/mips/translate.c
> +++ b/target/mips/translate.c
> @@ -24650,6 +24650,77 @@ static void gen_mmi_pexcw(DisasContext *ctx)
>      }
>  }
> 
> +/*
> + *  PEXEH rd, rt
> + *
> + *  Parallel Exchange Even Halfword

Indentation...

> + *
> + *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
> + *  +-----------+---------+---------+---------+---------+-----------+
> + *  |    MMI    |0 0 0 0 0|   rt    |   rd    |  PEXEH  |    MMI2   |
> + *  +-----------+---------+---------+---------+---------+-----------+
> + */
> +
> +static void gen_mmi_pexeh(DisasContext *ctx)
> +{
> +    uint32_t pd, rt, rd;
> +    uint32_t opcode;
> +
> +    opcode = ctx->opcode;
> +
> +    pd = extract32(opcode, 21, 5);
> +    rt = extract32(opcode, 16, 5);
> +    rd = extract32(opcode, 11, 5);
> +
> +    if (unlikely(pd != 0)) {
> +        generate_exception_end(ctx, EXCP_RI);
> +    } else if (rd == 0) {
> +        /* nop */
> +    } else if (rt == 0) {
> +        tcg_gen_movi_i64(cpu_gpr[rd], 0);
> +        tcg_gen_movi_i64(cpu_mmr[rd], 0);
> +    } else {
> +        TCGv_i64 t0 = tcg_temp_new();
> +        TCGv_i64 t1 = tcg_temp_new();
> +        uint64_t mask0 = (1ULL << 16) - 1;
> +        uint64_t mask1 = mask0 << 16;
> +        uint64_t mask2 = mask1 << 16;
> +        uint64_t mask3 = mask2 << 16;

What about:

       uint64_t mask2 = mask0 << 32;
       uint64_t mask3 = mask0 << 48;

> +
> +        tcg_gen_movi_i64(t1, 0);
> +

The last blank line should be deleted IMO.

> +        tcg_gen_andi_i64(t0, cpu_gpr[rt], mask0);
> +        tcg_gen_shli_i64(t0, t0, 32);
> +        tcg_gen_or_i64(t1, t0, t1);
> +        tcg_gen_andi_i64(t0, cpu_gpr[rt], mask1);
> +        tcg_gen_or_i64(t1, t0, t1);
> +        tcg_gen_andi_i64(t0, cpu_gpr[rt], mask2);
> +        tcg_gen_shri_i64(t0, t0, 32);
> +        tcg_gen_or_i64(t1, t0, t1);
> +        tcg_gen_andi_i64(t0, cpu_gpr[rt], mask3);
> +        tcg_gen_or_i64(t1, t0, t1);
> +
> +        tcg_gen_mov_i64(cpu_gpr[rd], t1);
> +        tcg_gen_movi_i64(t1, 0);
> +
> +        tcg_gen_andi_i64(t0, cpu_mmr[rt], mask0);

Better (more logical nad consistent with other patches) spacing is like
this:

        tcg_gen_mov_i64(cpu_gpr[rd], t1);

        tcg_gen_movi_i64(t1, 0);
        tcg_gen_andi_i64(t0, cpu_mmr[rt], mask0);

> +        tcg_gen_shli_i64(t0, t0, 32);
> +        tcg_gen_or_i64(t1, t0, t1);
> +        tcg_gen_andi_i64(t0, cpu_mmr[rt], mask1);
> +        tcg_gen_or_i64(t1, t0, t1);
> +        tcg_gen_andi_i64(t0, cpu_mmr[rt], mask2);
> +        tcg_gen_shri_i64(t0, t0, 32);
> +        tcg_gen_or_i64(t1, t0, t1);
> +        tcg_gen_andi_i64(t0, cpu_mmr[rt], mask3);
> +        tcg_gen_or_i64(t1, t0, t1);
> +
> +        tcg_gen_mov_i64(cpu_mmr[rd], t1);
> +
> +        tcg_temp_free(t0);
> +        tcg_temp_free(t1);
> +    }

I think that if rt == rd this whole block can be rewritten in a more optimal
and clearer way:

        tcg_gen_andi_i64(t0, cpu_gpr[rt], mask0);
        tcg_gen_shli_i64(t0, t0, 32);
        tcg_gen_andi_i64(t1, cpu_gpr[rt], mask2);
        tcg_gen_shri_i64(t1, t1, 32);

        tcg_gen_andi_i64(cpu_gpr[rd], mask3 || mask1);
        tcg_gen_or_i64(cpu_gpr[rd], t0);
        tcg_gen_or_i64(cpu_gpr[rd], t1);

and the same for "mmr" half.

Aleksandar

> +}
> +
>  #endif
> 
> 
> @@ -27670,7 +27741,6 @@ static void decode_mmi2(CPUMIPSState *env, DisasContext *ctx)
>      case MMI_OPC_2_PXOR:      /* TODO: MMI_OPC_2_PXOR */
>      case MMI_OPC_2_PMSUBH:    /* TODO: MMI_OPC_2_PMSUBH */
>      case MMI_OPC_2_PHMSBH:    /* TODO: MMI_OPC_2_PHMSBH */
> -    case MMI_OPC_2_PEXEH:     /* TODO: MMI_OPC_2_PEXEH */
>      case MMI_OPC_2_PREVH:     /* TODO: MMI_OPC_2_PREVH */
>      case MMI_OPC_2_PMULTH:    /* TODO: MMI_OPC_2_PMULTH */
>      case MMI_OPC_2_PDIVBW:    /* TODO: MMI_OPC_2_PDIVBW */
> @@ -27681,6 +27751,9 @@ static void decode_mmi2(CPUMIPSState *env, DisasContext *ctx)
>      case MMI_OPC_2_PCPYLD:
>          gen_mmi_pcpyld(ctx);
>          break;
> +    case MMI_OPC_2_PEXEH:
> +        gen_mmi_pexeh(ctx);
> +        break;
>      default:
>          MIPS_INVAL("TX79 MMI class MMI2");
>          generate_exception_end(ctx, EXCP_RI);
> --
> 2.7.4
> 
> 
Re: [Qemu-devel] [PATCH v3 06/13] target/mips: Add emulation of MMI instruction PEXEH
Posted by Richard Henderson 6 years, 8 months ago
On 3/4/19 10:27 AM, Aleksandar Markovic wrote:
>> From: Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
>> Subject: [PATCH v3 06/13] target/mips: Add emulation of MMI instruction PEXEH
>>
>> From: Mateja Marjanovic <Mateja.Marjanovic@rt-rk.com>
>>
>> Add emulation of MMI instruction PEXEH. The emulation is implemented
>> using TCG front end operations directly to achieve better performance.
>>
>> Signed-off-by: Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
>> ---
>>  target/mips/translate.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++-
>>  1 file changed, 74 insertions(+), 1 deletion(-)
>>
>> diff --git a/target/mips/translate.c b/target/mips/translate.c
>> index 64eb10c..01efcde 100644
>> --- a/target/mips/translate.c
>> +++ b/target/mips/translate.c
>> @@ -24650,6 +24650,77 @@ static void gen_mmi_pexcw(DisasContext *ctx)
>>      }
>>  }
>>
>> +/*
>> + *  PEXEH rd, rt
>> + *
>> + *  Parallel Exchange Even Halfword
> 
> Indentation...
> 
>> + *
>> + *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
>> + *  +-----------+---------+---------+---------+---------+-----------+
>> + *  |    MMI    |0 0 0 0 0|   rt    |   rd    |  PEXEH  |    MMI2   |
>> + *  +-----------+---------+---------+---------+---------+-----------+
>> + */
>> +
>> +static void gen_mmi_pexeh(DisasContext *ctx)
>> +{
>> +    uint32_t pd, rt, rd;
>> +    uint32_t opcode;
>> +
>> +    opcode = ctx->opcode;
>> +
>> +    pd = extract32(opcode, 21, 5);
>> +    rt = extract32(opcode, 16, 5);
>> +    rd = extract32(opcode, 11, 5);
>> +
>> +    if (unlikely(pd != 0)) {
>> +        generate_exception_end(ctx, EXCP_RI);
>> +    } else if (rd == 0) {
>> +        /* nop */
>> +    } else if (rt == 0) {
>> +        tcg_gen_movi_i64(cpu_gpr[rd], 0);
>> +        tcg_gen_movi_i64(cpu_mmr[rd], 0);
>> +    } else {
>> +        TCGv_i64 t0 = tcg_temp_new();
>> +        TCGv_i64 t1 = tcg_temp_new();
>> +        uint64_t mask0 = (1ULL << 16) - 1;
>> +        uint64_t mask1 = mask0 << 16;
>> +        uint64_t mask2 = mask1 << 16;
>> +        uint64_t mask3 = mask2 << 16;
> 
> What about:
> 
>        uint64_t mask2 = mask0 << 32;
>        uint64_t mask3 = mask0 << 48;
> 
>> +
>> +        tcg_gen_movi_i64(t1, 0);
>> +
> 
> The last blank line should be deleted IMO.
> 
>> +        tcg_gen_andi_i64(t0, cpu_gpr[rt], mask0);
>> +        tcg_gen_shli_i64(t0, t0, 32);
>> +        tcg_gen_or_i64(t1, t0, t1);
>> +        tcg_gen_andi_i64(t0, cpu_gpr[rt], mask1);
>> +        tcg_gen_or_i64(t1, t0, t1);
>> +        tcg_gen_andi_i64(t0, cpu_gpr[rt], mask2);
>> +        tcg_gen_shri_i64(t0, t0, 32);
>> +        tcg_gen_or_i64(t1, t0, t1);
>> +        tcg_gen_andi_i64(t0, cpu_gpr[rt], mask3);
>> +        tcg_gen_or_i64(t1, t0, t1);
>> +
>> +        tcg_gen_mov_i64(cpu_gpr[rd], t1);
>> +        tcg_gen_movi_i64(t1, 0);
>> +
>> +        tcg_gen_andi_i64(t0, cpu_mmr[rt], mask0);
> 
> Better (more logical nad consistent with other patches) spacing is like
> this:
> 
>         tcg_gen_mov_i64(cpu_gpr[rd], t1);
> 
>         tcg_gen_movi_i64(t1, 0);
>         tcg_gen_andi_i64(t0, cpu_mmr[rt], mask0);
> 
>> +        tcg_gen_shli_i64(t0, t0, 32);
>> +        tcg_gen_or_i64(t1, t0, t1);
>> +        tcg_gen_andi_i64(t0, cpu_mmr[rt], mask1);
>> +        tcg_gen_or_i64(t1, t0, t1);
>> +        tcg_gen_andi_i64(t0, cpu_mmr[rt], mask2);
>> +        tcg_gen_shri_i64(t0, t0, 32);
>> +        tcg_gen_or_i64(t1, t0, t1);
>> +        tcg_gen_andi_i64(t0, cpu_mmr[rt], mask3);
>> +        tcg_gen_or_i64(t1, t0, t1);
>> +
>> +        tcg_gen_mov_i64(cpu_mmr[rd], t1);
>> +
>> +        tcg_temp_free(t0);
>> +        tcg_temp_free(t1);
>> +    }
> 
> I think that if rt == rd this whole block can be rewritten in a more optimal
> and clearer way:
> 
>         tcg_gen_andi_i64(t0, cpu_gpr[rt], mask0);
>         tcg_gen_shli_i64(t0, t0, 32);
>         tcg_gen_andi_i64(t1, cpu_gpr[rt], mask2);
>         tcg_gen_shri_i64(t1, t1, 32);
> 
>         tcg_gen_andi_i64(cpu_gpr[rd], mask3 || mask1);
>         tcg_gen_or_i64(cpu_gpr[rd], t0);
>         tcg_gen_or_i64(cpu_gpr[rd], t1);
> 
> and the same for "mmr" half.

You don't need rt == rd in order to write it that way:

  tcg_gen_andi_i64(cpu_gpr[rd], cpu_gpr[rt], mask3 | mask1);

And yes, that's much better.


r~