From: Mateja Marjanovic <Mateja.Marjanovic@rt-rk.com>
Add emulation of MMI instruction PEXEH. The emulation is implemented
using TCG front end operations directly to achieve better performance.
Signed-off-by: Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
---
target/mips/translate.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 74 insertions(+), 1 deletion(-)
diff --git a/target/mips/translate.c b/target/mips/translate.c
index 64eb10c..01efcde 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -24650,6 +24650,77 @@ static void gen_mmi_pexcw(DisasContext *ctx)
}
}
+/*
+ * PEXEH rd, rt
+ *
+ * Parallel Exchange Even Halfword
+ *
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-----------+---------+---------+---------+---------+-----------+
+ * | MMI |0 0 0 0 0| rt | rd | PEXEH | MMI2 |
+ * +-----------+---------+---------+---------+---------+-----------+
+ */
+
+static void gen_mmi_pexeh(DisasContext *ctx)
+{
+ uint32_t pd, rt, rd;
+ uint32_t opcode;
+
+ opcode = ctx->opcode;
+
+ pd = extract32(opcode, 21, 5);
+ rt = extract32(opcode, 16, 5);
+ rd = extract32(opcode, 11, 5);
+
+ if (unlikely(pd != 0)) {
+ generate_exception_end(ctx, EXCP_RI);
+ } else if (rd == 0) {
+ /* nop */
+ } else if (rt == 0) {
+ tcg_gen_movi_i64(cpu_gpr[rd], 0);
+ tcg_gen_movi_i64(cpu_mmr[rd], 0);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new();
+ TCGv_i64 t1 = tcg_temp_new();
+ uint64_t mask0 = (1ULL << 16) - 1;
+ uint64_t mask1 = mask0 << 16;
+ uint64_t mask2 = mask1 << 16;
+ uint64_t mask3 = mask2 << 16;
+
+ tcg_gen_movi_i64(t1, 0);
+
+ tcg_gen_andi_i64(t0, cpu_gpr[rt], mask0);
+ tcg_gen_shli_i64(t0, t0, 32);
+ tcg_gen_or_i64(t1, t0, t1);
+ tcg_gen_andi_i64(t0, cpu_gpr[rt], mask1);
+ tcg_gen_or_i64(t1, t0, t1);
+ tcg_gen_andi_i64(t0, cpu_gpr[rt], mask2);
+ tcg_gen_shri_i64(t0, t0, 32);
+ tcg_gen_or_i64(t1, t0, t1);
+ tcg_gen_andi_i64(t0, cpu_gpr[rt], mask3);
+ tcg_gen_or_i64(t1, t0, t1);
+
+ tcg_gen_mov_i64(cpu_gpr[rd], t1);
+ tcg_gen_movi_i64(t1, 0);
+
+ tcg_gen_andi_i64(t0, cpu_mmr[rt], mask0);
+ tcg_gen_shli_i64(t0, t0, 32);
+ tcg_gen_or_i64(t1, t0, t1);
+ tcg_gen_andi_i64(t0, cpu_mmr[rt], mask1);
+ tcg_gen_or_i64(t1, t0, t1);
+ tcg_gen_andi_i64(t0, cpu_mmr[rt], mask2);
+ tcg_gen_shri_i64(t0, t0, 32);
+ tcg_gen_or_i64(t1, t0, t1);
+ tcg_gen_andi_i64(t0, cpu_mmr[rt], mask3);
+ tcg_gen_or_i64(t1, t0, t1);
+
+ tcg_gen_mov_i64(cpu_mmr[rd], t1);
+
+ tcg_temp_free(t0);
+ tcg_temp_free(t1);
+ }
+}
+
#endif
@@ -27670,7 +27741,6 @@ static void decode_mmi2(CPUMIPSState *env, DisasContext *ctx)
case MMI_OPC_2_PXOR: /* TODO: MMI_OPC_2_PXOR */
case MMI_OPC_2_PMSUBH: /* TODO: MMI_OPC_2_PMSUBH */
case MMI_OPC_2_PHMSBH: /* TODO: MMI_OPC_2_PHMSBH */
- case MMI_OPC_2_PEXEH: /* TODO: MMI_OPC_2_PEXEH */
case MMI_OPC_2_PREVH: /* TODO: MMI_OPC_2_PREVH */
case MMI_OPC_2_PMULTH: /* TODO: MMI_OPC_2_PMULTH */
case MMI_OPC_2_PDIVBW: /* TODO: MMI_OPC_2_PDIVBW */
@@ -27681,6 +27751,9 @@ static void decode_mmi2(CPUMIPSState *env, DisasContext *ctx)
case MMI_OPC_2_PCPYLD:
gen_mmi_pcpyld(ctx);
break;
+ case MMI_OPC_2_PEXEH:
+ gen_mmi_pexeh(ctx);
+ break;
default:
MIPS_INVAL("TX79 MMI class MMI2");
generate_exception_end(ctx, EXCP_RI);
--
2.7.4
> From: Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
> Subject: [PATCH v3 06/13] target/mips: Add emulation of MMI instruction PEXEH
>
> From: Mateja Marjanovic <Mateja.Marjanovic@rt-rk.com>
>
> Add emulation of MMI instruction PEXEH. The emulation is implemented
> using TCG front end operations directly to achieve better performance.
>
> Signed-off-by: Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
> ---
> target/mips/translate.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 74 insertions(+), 1 deletion(-)
>
> diff --git a/target/mips/translate.c b/target/mips/translate.c
> index 64eb10c..01efcde 100644
> --- a/target/mips/translate.c
> +++ b/target/mips/translate.c
> @@ -24650,6 +24650,77 @@ static void gen_mmi_pexcw(DisasContext *ctx)
> }
> }
>
> +/*
> + * PEXEH rd, rt
> + *
> + * Parallel Exchange Even Halfword
Indentation...
> + *
> + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
> + * +-----------+---------+---------+---------+---------+-----------+
> + * | MMI |0 0 0 0 0| rt | rd | PEXEH | MMI2 |
> + * +-----------+---------+---------+---------+---------+-----------+
> + */
> +
> +static void gen_mmi_pexeh(DisasContext *ctx)
> +{
> + uint32_t pd, rt, rd;
> + uint32_t opcode;
> +
> + opcode = ctx->opcode;
> +
> + pd = extract32(opcode, 21, 5);
> + rt = extract32(opcode, 16, 5);
> + rd = extract32(opcode, 11, 5);
> +
> + if (unlikely(pd != 0)) {
> + generate_exception_end(ctx, EXCP_RI);
> + } else if (rd == 0) {
> + /* nop */
> + } else if (rt == 0) {
> + tcg_gen_movi_i64(cpu_gpr[rd], 0);
> + tcg_gen_movi_i64(cpu_mmr[rd], 0);
> + } else {
> + TCGv_i64 t0 = tcg_temp_new();
> + TCGv_i64 t1 = tcg_temp_new();
> + uint64_t mask0 = (1ULL << 16) - 1;
> + uint64_t mask1 = mask0 << 16;
> + uint64_t mask2 = mask1 << 16;
> + uint64_t mask3 = mask2 << 16;
What about:
uint64_t mask2 = mask0 << 32;
uint64_t mask3 = mask0 << 48;
> +
> + tcg_gen_movi_i64(t1, 0);
> +
The last blank line should be deleted IMO.
> + tcg_gen_andi_i64(t0, cpu_gpr[rt], mask0);
> + tcg_gen_shli_i64(t0, t0, 32);
> + tcg_gen_or_i64(t1, t0, t1);
> + tcg_gen_andi_i64(t0, cpu_gpr[rt], mask1);
> + tcg_gen_or_i64(t1, t0, t1);
> + tcg_gen_andi_i64(t0, cpu_gpr[rt], mask2);
> + tcg_gen_shri_i64(t0, t0, 32);
> + tcg_gen_or_i64(t1, t0, t1);
> + tcg_gen_andi_i64(t0, cpu_gpr[rt], mask3);
> + tcg_gen_or_i64(t1, t0, t1);
> +
> + tcg_gen_mov_i64(cpu_gpr[rd], t1);
> + tcg_gen_movi_i64(t1, 0);
> +
> + tcg_gen_andi_i64(t0, cpu_mmr[rt], mask0);
Better (more logical nad consistent with other patches) spacing is like
this:
tcg_gen_mov_i64(cpu_gpr[rd], t1);
tcg_gen_movi_i64(t1, 0);
tcg_gen_andi_i64(t0, cpu_mmr[rt], mask0);
> + tcg_gen_shli_i64(t0, t0, 32);
> + tcg_gen_or_i64(t1, t0, t1);
> + tcg_gen_andi_i64(t0, cpu_mmr[rt], mask1);
> + tcg_gen_or_i64(t1, t0, t1);
> + tcg_gen_andi_i64(t0, cpu_mmr[rt], mask2);
> + tcg_gen_shri_i64(t0, t0, 32);
> + tcg_gen_or_i64(t1, t0, t1);
> + tcg_gen_andi_i64(t0, cpu_mmr[rt], mask3);
> + tcg_gen_or_i64(t1, t0, t1);
> +
> + tcg_gen_mov_i64(cpu_mmr[rd], t1);
> +
> + tcg_temp_free(t0);
> + tcg_temp_free(t1);
> + }
I think that if rt == rd this whole block can be rewritten in a more optimal
and clearer way:
tcg_gen_andi_i64(t0, cpu_gpr[rt], mask0);
tcg_gen_shli_i64(t0, t0, 32);
tcg_gen_andi_i64(t1, cpu_gpr[rt], mask2);
tcg_gen_shri_i64(t1, t1, 32);
tcg_gen_andi_i64(cpu_gpr[rd], mask3 || mask1);
tcg_gen_or_i64(cpu_gpr[rd], t0);
tcg_gen_or_i64(cpu_gpr[rd], t1);
and the same for "mmr" half.
Aleksandar
> +}
> +
> #endif
>
>
> @@ -27670,7 +27741,6 @@ static void decode_mmi2(CPUMIPSState *env, DisasContext *ctx)
> case MMI_OPC_2_PXOR: /* TODO: MMI_OPC_2_PXOR */
> case MMI_OPC_2_PMSUBH: /* TODO: MMI_OPC_2_PMSUBH */
> case MMI_OPC_2_PHMSBH: /* TODO: MMI_OPC_2_PHMSBH */
> - case MMI_OPC_2_PEXEH: /* TODO: MMI_OPC_2_PEXEH */
> case MMI_OPC_2_PREVH: /* TODO: MMI_OPC_2_PREVH */
> case MMI_OPC_2_PMULTH: /* TODO: MMI_OPC_2_PMULTH */
> case MMI_OPC_2_PDIVBW: /* TODO: MMI_OPC_2_PDIVBW */
> @@ -27681,6 +27751,9 @@ static void decode_mmi2(CPUMIPSState *env, DisasContext *ctx)
> case MMI_OPC_2_PCPYLD:
> gen_mmi_pcpyld(ctx);
> break;
> + case MMI_OPC_2_PEXEH:
> + gen_mmi_pexeh(ctx);
> + break;
> default:
> MIPS_INVAL("TX79 MMI class MMI2");
> generate_exception_end(ctx, EXCP_RI);
> --
> 2.7.4
>
>
On 3/4/19 10:27 AM, Aleksandar Markovic wrote:
>> From: Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
>> Subject: [PATCH v3 06/13] target/mips: Add emulation of MMI instruction PEXEH
>>
>> From: Mateja Marjanovic <Mateja.Marjanovic@rt-rk.com>
>>
>> Add emulation of MMI instruction PEXEH. The emulation is implemented
>> using TCG front end operations directly to achieve better performance.
>>
>> Signed-off-by: Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
>> ---
>> target/mips/translate.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++-
>> 1 file changed, 74 insertions(+), 1 deletion(-)
>>
>> diff --git a/target/mips/translate.c b/target/mips/translate.c
>> index 64eb10c..01efcde 100644
>> --- a/target/mips/translate.c
>> +++ b/target/mips/translate.c
>> @@ -24650,6 +24650,77 @@ static void gen_mmi_pexcw(DisasContext *ctx)
>> }
>> }
>>
>> +/*
>> + * PEXEH rd, rt
>> + *
>> + * Parallel Exchange Even Halfword
>
> Indentation...
>
>> + *
>> + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
>> + * +-----------+---------+---------+---------+---------+-----------+
>> + * | MMI |0 0 0 0 0| rt | rd | PEXEH | MMI2 |
>> + * +-----------+---------+---------+---------+---------+-----------+
>> + */
>> +
>> +static void gen_mmi_pexeh(DisasContext *ctx)
>> +{
>> + uint32_t pd, rt, rd;
>> + uint32_t opcode;
>> +
>> + opcode = ctx->opcode;
>> +
>> + pd = extract32(opcode, 21, 5);
>> + rt = extract32(opcode, 16, 5);
>> + rd = extract32(opcode, 11, 5);
>> +
>> + if (unlikely(pd != 0)) {
>> + generate_exception_end(ctx, EXCP_RI);
>> + } else if (rd == 0) {
>> + /* nop */
>> + } else if (rt == 0) {
>> + tcg_gen_movi_i64(cpu_gpr[rd], 0);
>> + tcg_gen_movi_i64(cpu_mmr[rd], 0);
>> + } else {
>> + TCGv_i64 t0 = tcg_temp_new();
>> + TCGv_i64 t1 = tcg_temp_new();
>> + uint64_t mask0 = (1ULL << 16) - 1;
>> + uint64_t mask1 = mask0 << 16;
>> + uint64_t mask2 = mask1 << 16;
>> + uint64_t mask3 = mask2 << 16;
>
> What about:
>
> uint64_t mask2 = mask0 << 32;
> uint64_t mask3 = mask0 << 48;
>
>> +
>> + tcg_gen_movi_i64(t1, 0);
>> +
>
> The last blank line should be deleted IMO.
>
>> + tcg_gen_andi_i64(t0, cpu_gpr[rt], mask0);
>> + tcg_gen_shli_i64(t0, t0, 32);
>> + tcg_gen_or_i64(t1, t0, t1);
>> + tcg_gen_andi_i64(t0, cpu_gpr[rt], mask1);
>> + tcg_gen_or_i64(t1, t0, t1);
>> + tcg_gen_andi_i64(t0, cpu_gpr[rt], mask2);
>> + tcg_gen_shri_i64(t0, t0, 32);
>> + tcg_gen_or_i64(t1, t0, t1);
>> + tcg_gen_andi_i64(t0, cpu_gpr[rt], mask3);
>> + tcg_gen_or_i64(t1, t0, t1);
>> +
>> + tcg_gen_mov_i64(cpu_gpr[rd], t1);
>> + tcg_gen_movi_i64(t1, 0);
>> +
>> + tcg_gen_andi_i64(t0, cpu_mmr[rt], mask0);
>
> Better (more logical nad consistent with other patches) spacing is like
> this:
>
> tcg_gen_mov_i64(cpu_gpr[rd], t1);
>
> tcg_gen_movi_i64(t1, 0);
> tcg_gen_andi_i64(t0, cpu_mmr[rt], mask0);
>
>> + tcg_gen_shli_i64(t0, t0, 32);
>> + tcg_gen_or_i64(t1, t0, t1);
>> + tcg_gen_andi_i64(t0, cpu_mmr[rt], mask1);
>> + tcg_gen_or_i64(t1, t0, t1);
>> + tcg_gen_andi_i64(t0, cpu_mmr[rt], mask2);
>> + tcg_gen_shri_i64(t0, t0, 32);
>> + tcg_gen_or_i64(t1, t0, t1);
>> + tcg_gen_andi_i64(t0, cpu_mmr[rt], mask3);
>> + tcg_gen_or_i64(t1, t0, t1);
>> +
>> + tcg_gen_mov_i64(cpu_mmr[rd], t1);
>> +
>> + tcg_temp_free(t0);
>> + tcg_temp_free(t1);
>> + }
>
> I think that if rt == rd this whole block can be rewritten in a more optimal
> and clearer way:
>
> tcg_gen_andi_i64(t0, cpu_gpr[rt], mask0);
> tcg_gen_shli_i64(t0, t0, 32);
> tcg_gen_andi_i64(t1, cpu_gpr[rt], mask2);
> tcg_gen_shri_i64(t1, t1, 32);
>
> tcg_gen_andi_i64(cpu_gpr[rd], mask3 || mask1);
> tcg_gen_or_i64(cpu_gpr[rd], t0);
> tcg_gen_or_i64(cpu_gpr[rd], t1);
>
> and the same for "mmr" half.
You don't need rt == rd in order to write it that way:
tcg_gen_andi_i64(cpu_gpr[rd], cpu_gpr[rt], mask3 | mask1);
And yes, that's much better.
r~
© 2016 - 2025 Red Hat, Inc.