Adds support for emulating the D16MAC instruction.
Signed-off-by: Craig Janeczek <jancraig@amazon.com>
---
v1
- initial patch
v2
- changed bitfield usage to extract32
- used sextract_tl instructions instead of shift and ext
v3
- Split gen_mxu function into command specific gen_mxu_<ins> functions
target/mips/translate.c | 78 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 78 insertions(+)
diff --git a/target/mips/translate.c b/target/mips/translate.c
index f693e45203..7c17867d30 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -3967,6 +3967,80 @@ static void gen_mxu_d16mul(DisasContext *ctx, uint32_t opc)
tcg_temp_free(t3);
}
+/* D16MAC XRa, XRb, XRc, XRd, APTN2, OPTN2
+ * Signed 16 bit pattern multiply and accumulate */
+static void gen_mxu_d16mac(DisasContext *ctx, uint32_t opc)
+{
+ TCGv t0, t1, t2, t3;
+ uint32_t xra, xrb, xrc, xrd, optn2, aptn2;
+
+ t0 = tcg_temp_new();
+ t1 = tcg_temp_new();
+ t2 = tcg_temp_new();
+ t3 = tcg_temp_new();
+
+ xra = extract32(ctx->opcode, 6, 4);
+ xrb = extract32(ctx->opcode, 10, 4);
+ xrc = extract32(ctx->opcode, 14, 4);
+ xrd = extract32(ctx->opcode, 18, 4);
+ optn2 = extract32(ctx->opcode, 22, 2);
+ aptn2 = extract32(ctx->opcode, 24, 2);
+
+ gen_load_mxu_gpr(t1, xrb);
+ tcg_gen_sextract_tl(t0, t1, 0, 16);
+ tcg_gen_sextract_tl(t1, t1, 16, 16);
+ gen_load_mxu_gpr(t3, xrc);
+ tcg_gen_sextract_tl(t2, t3, 0, 16);
+ tcg_gen_sextract_tl(t3, t3, 16, 16);
+
+ switch (optn2) {
+ case 0: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */
+ tcg_gen_mul_tl(t3, t1, t3);
+ tcg_gen_mul_tl(t2, t0, t2);
+ break;
+ case 1: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */
+ tcg_gen_mul_tl(t3, t0, t3);
+ tcg_gen_mul_tl(t2, t0, t2);
+ break;
+ case 2: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */
+ tcg_gen_mul_tl(t3, t1, t3);
+ tcg_gen_mul_tl(t2, t1, t2);
+ break;
+ case 3: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */
+ tcg_gen_mul_tl(t3, t0, t3);
+ tcg_gen_mul_tl(t2, t1, t2);
+ break;
+ }
+ gen_load_mxu_gpr(t0, xra);
+ gen_load_mxu_gpr(t1, xrd);
+
+ switch (aptn2) {
+ case 0:
+ tcg_gen_add_tl(t3, t0, t3);
+ tcg_gen_add_tl(t2, t1, t2);
+ break;
+ case 1:
+ tcg_gen_add_tl(t3, t0, t3);
+ tcg_gen_sub_tl(t2, t1, t2);
+ break;
+ case 2:
+ tcg_gen_sub_tl(t3, t0, t3);
+ tcg_gen_add_tl(t2, t1, t2);
+ break;
+ case 3:
+ tcg_gen_sub_tl(t3, t0, t3);
+ tcg_gen_sub_tl(t2, t1, t2);
+ break;
+ }
+ gen_store_mxu_gpr(t3, xra);
+ gen_store_mxu_gpr(t2, xrd);
+
+ tcg_temp_free(t0);
+ tcg_temp_free(t1);
+ tcg_temp_free(t2);
+ tcg_temp_free(t3);
+}
+
/* Godson integer instructions */
static void gen_loongson_integer(DisasContext *ctx, uint32_t opc,
int rd, int rs, int rt)
@@ -18080,6 +18154,10 @@ static void decode_opc_special2_legacy(CPUMIPSState *env, DisasContext *ctx)
gen_mxu_d16mul(ctx, op1);
break;
+ case OPC_MXU_D16MAC:
+ gen_mxu_d16mac(ctx, op1);
+ break;
+
case OPC_CLO:
case OPC_CLZ:
check_insn(ctx, ISA_MIPS32);
--
2.18.0
> From: Craig Janeczek <jancraig@amazon.com>
> Sent: Tuesday, August 28, 2018 3:00 PM
> To: qemu-devel@nongnu.org
> Cc: Aleksandar Markovic; aurelien@aurel32.net; Craig Janeczek
> Subject: [PATCH v3 6/8] target/mips: Add MXU instruction D16MAC
>
> Adds support for emulating the D16MAC instruction.
>
> Signed-off-by: Craig Janeczek <jancraig@amazon.com>
> ---
> v1
> - initial patch
> v2
> - changed bitfield usage to extract32
> - used sextract_tl instructions instead of shift and ext
> v3
> - Split gen_mxu function into command specific gen_mxu_<ins> functions
>
> target/mips/translate.c | 78 +++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 78 insertions(+)
>
> diff --git a/target/mips/translate.c b/target/mips/translate.c
> index f693e45203..7c17867d30 100644
> --- a/target/mips/translate.c
> +++ b/target/mips/translate.c
> @@ -3967,6 +3967,80 @@ static void gen_mxu_d16mul(DisasContext *ctx, uint32_t opc)
> tcg_temp_free(t3);
> }
>
> +/* D16MAC XRa, XRb, XRc, XRd, APTN2, OPTN2
> + * Signed 16 bit pattern multiply and accumulate */
In QEMU, for all new code, the multiline comment should be in this format:
/*
* This is a
* multiline comment.
*/
Sometimes it is allowed to wtite this:
/*
* Single line comment in multiline format.
*/
"Classic" comment format is reserved just for such single-line cases:
/* This is a classic way to wtite a comment. */
> +static void gen_mxu_d16mac(DisasContext *ctx, uint32_t opc)
> +{
> + TCGv t0, t1, t2, t3;
> + uint32_t xra, xrb, xrc, xrd, optn2, aptn2;
> +
> + t0 = tcg_temp_new();
> + t1 = tcg_temp_new();
> + t2 = tcg_temp_new();
> + t3 = tcg_temp_new();
> +
> + xra = extract32(ctx->opcode, 6, 4);
> + xrb = extract32(ctx->opcode, 10, 4);
> + xrc = extract32(ctx->opcode, 14, 4);
> + xrd = extract32(ctx->opcode, 18, 4);
> + optn2 = extract32(ctx->opcode, 22, 2);
> + aptn2 = extract32(ctx->opcode, 24, 2);
> +
> + gen_load_mxu_gpr(t1, xrb);
> + tcg_gen_sextract_tl(t0, t1, 0, 16);
> + tcg_gen_sextract_tl(t1, t1, 16, 16);
> + gen_load_mxu_gpr(t3, xrc);
> + tcg_gen_sextract_tl(t2, t3, 0, 16);
> + tcg_gen_sextract_tl(t3, t3, 16, 16);
> +
> + switch (optn2) {
> + case 0: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */
> + tcg_gen_mul_tl(t3, t1, t3);
> + tcg_gen_mul_tl(t2, t0, t2);
> + break;
> + case 1: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */
> + tcg_gen_mul_tl(t3, t0, t3);
> + tcg_gen_mul_tl(t2, t0, t2);
> + break;
> + case 2: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */
> + tcg_gen_mul_tl(t3, t1, t3);
> + tcg_gen_mul_tl(t2, t1, t2);
> + break;
> + case 3: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */
> + tcg_gen_mul_tl(t3, t0, t3);
> + tcg_gen_mul_tl(t2, t1, t2);
> + break;
> + }
Somewhere above all gem_mxu_XXX() functions, the following code segment should be inserted:
/* MXU operand getting patterns */
#define MXU_OPTN2_WW 0
#define MXU_OPTN2_LW 1
#define MXU_OPTN2_HW 2
#define MXU_OPTN2_XW 3
... and these constants should be used in the switch statement just above.
> + gen_load_mxu_gpr(t0, xra);
> + gen_load_mxu_gpr(t1, xrd);
> +
> + switch (aptn2) {
> + case 0:
> + tcg_gen_add_tl(t3, t0, t3);
> + tcg_gen_add_tl(t2, t1, t2);
> + break;
> + case 1:
> + tcg_gen_add_tl(t3, t0, t3);
> + tcg_gen_sub_tl(t2, t1, t2);
> + break;
> + case 2:
> + tcg_gen_sub_tl(t3, t0, t3);
> + tcg_gen_add_tl(t2, t1, t2);
> + break;
> + case 3:
> + tcg_gen_sub_tl(t3, t0, t3);
> + tcg_gen_sub_tl(t2, t1, t2);
> + break;
> + }
Somewhere above all gem_mxu_XXX() functions, the following code segment should be inserted:
/* MXU acumulate patterns */
#define MXU_APTN2_AA 0
#define MXU_APTN2_AS 1
#define MXU_APTN2_SA 2
#define MXU_APTN2_SS 3
... and these constants should be used in the switch statement just above.
> + gen_store_mxu_gpr(t3, xra);
> + gen_store_mxu_gpr(t2, xrd);
> +
> + tcg_temp_free(t0);
> + tcg_temp_free(t1);
> + tcg_temp_free(t2);
> + tcg_temp_free(t3);
> +}
> +
> /* Godson integer instructions */
> static void gen_loongson_integer(DisasContext *ctx, uint32_t opc,
> int rd, int rs, int rt)
> @@ -18080,6 +18154,10 @@ static void decode_opc_special2_legacy(CPUMIPSState *env, DisasContext *ctx)
> gen_mxu_d16mul(ctx, op1);
> break;
>
> + case OPC_MXU_D16MAC:
> + gen_mxu_d16mac(ctx, op1);
> + break;
> +
> case OPC_CLO:
> case OPC_CLZ:
> check_insn(ctx, ISA_MIPS32);
© 2016 - 2025 Red Hat, Inc.