[RFC PATCH 28/42] target/mips/tx79: Move RDHWR usermode kludge to trans_SQ()

Philippe Mathieu-Daudé posted 42 patches 4 years, 9 months ago
There is a newer version of this series
[RFC PATCH 28/42] target/mips/tx79: Move RDHWR usermode kludge to trans_SQ()
Posted by Philippe Mathieu-Daudé 4 years, 9 months ago
Now than SQ is properly implemented, we can move the RDHWR
kludge required to have usermode working with recent glibc.

Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
---
 target/mips/translate.c      | 56 ------------------------------------
 target/mips/tx79_translate.c | 34 +++++++++++++++++++++-
 2 files changed, 33 insertions(+), 57 deletions(-)

diff --git a/target/mips/translate.c b/target/mips/translate.c
index c1d07a4591d..0fa2b3bcc15 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -1780,7 +1780,6 @@ enum {
 
 enum {
     MMI_OPC_CLASS_MMI = 0x1C << 26,    /* Same as OPC_SPECIAL2 */
-    MMI_OPC_SQ        = 0x1F << 26,    /* Same as OPC_SPECIAL3 */
 };
 
 /*
@@ -27330,53 +27329,6 @@ static void decode_mmi(CPUMIPSState *env, DisasContext *ctx)
     }
 }
 
-static void gen_mmi_sq(DisasContext *ctx, int base, int rt, int offset)
-{
-    gen_reserved_instruction(ctx);    /* TODO: MMI_OPC_SQ */
-}
-
-/*
- * The TX79-specific instruction Store Quadword
- *
- * +--------+-------+-------+------------------------+
- * | 011111 |  base |   rt  |           offset       | SQ
- * +--------+-------+-------+------------------------+
- *      6       5       5                 16
- *
- * has the same opcode as the Read Hardware Register instruction
- *
- * +--------+-------+-------+-------+-------+--------+
- * | 011111 | 00000 |   rt  |   rd  | 00000 | 111011 | RDHWR
- * +--------+-------+-------+-------+-------+--------+
- *      6       5       5       5       5        6
- *
- * that is required, trapped and emulated by the Linux kernel. However, all
- * RDHWR encodings yield address error exceptions on the TX79 since the SQ
- * offset is odd. Therefore all valid SQ instructions can execute normally.
- * In user mode, QEMU must verify the upper and lower 11 bits to distinguish
- * between SQ and RDHWR, as the Linux kernel does.
- */
-static void decode_mmi_sq(CPUMIPSState *env, DisasContext *ctx)
-{
-    int base = extract32(ctx->opcode, 21, 5);
-    int rt = extract32(ctx->opcode, 16, 5);
-    int offset = extract32(ctx->opcode, 0, 16);
-
-#ifdef CONFIG_USER_ONLY
-    uint32_t op1 = MASK_SPECIAL3(ctx->opcode);
-    uint32_t op2 = extract32(ctx->opcode, 6, 5);
-
-    if (base == 0 && op2 == 0 && op1 == OPC_RDHWR) {
-        int rd = extract32(ctx->opcode, 11, 5);
-
-        gen_rdhwr(ctx, rt, rd, 0);
-        return;
-    }
-#endif
-
-    gen_mmi_sq(ctx, base, rt, offset);
-}
-
 #endif
 
 static void decode_opc_special3(CPUMIPSState *env, DisasContext *ctx)
@@ -27561,15 +27513,7 @@ static bool decode_opc_legacy(CPUMIPSState *env, DisasContext *ctx)
         }
         break;
     case OPC_SPECIAL3:
-#if defined(TARGET_MIPS64)
-        if (ctx->insn_flags & INSN_R5900) {
-            decode_mmi_sq(env, ctx);    /* MMI_OPC_SQ */
-        } else {
-            decode_opc_special3(env, ctx);
-        }
-#else
         decode_opc_special3(env, ctx);
-#endif
         break;
     case OPC_REGIMM:
         op1 = MASK_REGIMM(ctx->opcode);
diff --git a/target/mips/tx79_translate.c b/target/mips/tx79_translate.c
index 386bae7808b..2aa3182d21d 100644
--- a/target/mips/tx79_translate.c
+++ b/target/mips/tx79_translate.c
@@ -411,7 +411,7 @@ static bool trans_LQ(DisasContext *ctx, arg_itype *a)
     return true;
 }
 
-static bool trans_SQ(DisasContext *ctx, arg_itype *a)
+static bool trans_SQ_real(DisasContext *ctx, arg_itype *a)
 {
     TCGv_i64 t0 = tcg_temp_new_i64();
     TCGv addr = tcg_temp_new();
@@ -438,6 +438,38 @@ static bool trans_SQ(DisasContext *ctx, arg_itype *a)
     return true;
 }
 
+static bool trans_SQ(DisasContext *ctx, arg_itype *a)
+{
+    /*
+     * The TX79-specific instruction Store Quadword
+     *
+     * +--------+-------+-------+------------------------+
+     * | 011111 |  base |   rt  |           offset       | SQ
+     * +--------+-------+-------+------------------------+
+     *      6       5       5                 16
+     *
+     * has the same opcode as the Read Hardware Register instruction
+     *
+     * +--------+-------+-------+-------+-------+--------+
+     * | 011111 | 00000 |   rt  |   rd  | 00000 | 111011 | RDHWR
+     * +--------+-------+-------+-------+-------+--------+
+     *      6       5       5       5       5        6
+     *
+     * that is required, trapped and emulated by the Linux kernel. However, all
+     * RDHWR encodings yield address error exceptions on the TX79 since the SQ
+     * offset is odd. Therefore all valid SQ instructions can execute normally.
+     * In user mode, QEMU must verify the upper and lower 13 bits to distinguish
+     * between SQ and RDHWR, as the Linux kernel does.
+     */
+#if defined(CONFIG_USER_ONLY)
+    if (!a->base && extract32(a->offset, 0, 11) == 0b00000111011) {
+        gen_rdhwr(ctx, a->rt, extract32(ctx->opcode, 11, 5), 0);
+        return true;
+    }
+#endif
+    return trans_SQ_real(ctx, a);
+}
+
 /*
  *     Multiply and Divide (19 instructions)
  *     -------------------------------------
-- 
2.26.2

Re: [RFC PATCH 28/42] target/mips/tx79: Move RDHWR usermode kludge to trans_SQ()
Posted by Richard Henderson 4 years, 9 months ago
On 2/14/21 9:58 AM, Philippe Mathieu-Daudé wrote:
> Now than SQ is properly implemented, we can move the RDHWR
> kludge required to have usermode working with recent glibc.
> 
> Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
> ---
>  target/mips/translate.c      | 56 ------------------------------------
>  target/mips/tx79_translate.c | 34 +++++++++++++++++++++-
>  2 files changed, 33 insertions(+), 57 deletions(-)
> 
> diff --git a/target/mips/translate.c b/target/mips/translate.c
> index c1d07a4591d..0fa2b3bcc15 100644
> --- a/target/mips/translate.c
> +++ b/target/mips/translate.c
> @@ -1780,7 +1780,6 @@ enum {
>  
>  enum {
>      MMI_OPC_CLASS_MMI = 0x1C << 26,    /* Same as OPC_SPECIAL2 */
> -    MMI_OPC_SQ        = 0x1F << 26,    /* Same as OPC_SPECIAL3 */
>  };
>  
>  /*
> @@ -27330,53 +27329,6 @@ static void decode_mmi(CPUMIPSState *env, DisasContext *ctx)
>      }
>  }
>  
> -static void gen_mmi_sq(DisasContext *ctx, int base, int rt, int offset)
> -{
> -    gen_reserved_instruction(ctx);    /* TODO: MMI_OPC_SQ */
> -}
> -
> -/*
> - * The TX79-specific instruction Store Quadword
> - *
> - * +--------+-------+-------+------------------------+
> - * | 011111 |  base |   rt  |           offset       | SQ
> - * +--------+-------+-------+------------------------+
> - *      6       5       5                 16
> - *
> - * has the same opcode as the Read Hardware Register instruction
> - *
> - * +--------+-------+-------+-------+-------+--------+
> - * | 011111 | 00000 |   rt  |   rd  | 00000 | 111011 | RDHWR
> - * +--------+-------+-------+-------+-------+--------+
> - *      6       5       5       5       5        6
> - *
> - * that is required, trapped and emulated by the Linux kernel. However, all
> - * RDHWR encodings yield address error exceptions on the TX79 since the SQ
> - * offset is odd. Therefore all valid SQ instructions can execute normally.
> - * In user mode, QEMU must verify the upper and lower 11 bits to distinguish
> - * between SQ and RDHWR, as the Linux kernel does.
> - */
> -static void decode_mmi_sq(CPUMIPSState *env, DisasContext *ctx)
> -{
> -    int base = extract32(ctx->opcode, 21, 5);
> -    int rt = extract32(ctx->opcode, 16, 5);
> -    int offset = extract32(ctx->opcode, 0, 16);
> -
> -#ifdef CONFIG_USER_ONLY
> -    uint32_t op1 = MASK_SPECIAL3(ctx->opcode);
> -    uint32_t op2 = extract32(ctx->opcode, 6, 5);
> -
> -    if (base == 0 && op2 == 0 && op1 == OPC_RDHWR) {
> -        int rd = extract32(ctx->opcode, 11, 5);
> -
> -        gen_rdhwr(ctx, rt, rd, 0);
> -        return;
> -    }
> -#endif
> -
> -    gen_mmi_sq(ctx, base, rt, offset);
> -}
> -
>  #endif
>  
>  static void decode_opc_special3(CPUMIPSState *env, DisasContext *ctx)
> @@ -27561,15 +27513,7 @@ static bool decode_opc_legacy(CPUMIPSState *env, DisasContext *ctx)
>          }
>          break;
>      case OPC_SPECIAL3:
> -#if defined(TARGET_MIPS64)
> -        if (ctx->insn_flags & INSN_R5900) {
> -            decode_mmi_sq(env, ctx);    /* MMI_OPC_SQ */
> -        } else {
> -            decode_opc_special3(env, ctx);
> -        }
> -#else
>          decode_opc_special3(env, ctx);
> -#endif
>          break;
>      case OPC_REGIMM:
>          op1 = MASK_REGIMM(ctx->opcode);
> diff --git a/target/mips/tx79_translate.c b/target/mips/tx79_translate.c
> index 386bae7808b..2aa3182d21d 100644
> --- a/target/mips/tx79_translate.c
> +++ b/target/mips/tx79_translate.c
> @@ -411,7 +411,7 @@ static bool trans_LQ(DisasContext *ctx, arg_itype *a)
>      return true;
>  }
>  
> -static bool trans_SQ(DisasContext *ctx, arg_itype *a)
> +static bool trans_SQ_real(DisasContext *ctx, arg_itype *a)
>  {
>      TCGv_i64 t0 = tcg_temp_new_i64();
>      TCGv addr = tcg_temp_new();
> @@ -438,6 +438,38 @@ static bool trans_SQ(DisasContext *ctx, arg_itype *a)
>      return true;
>  }
>  
> +static bool trans_SQ(DisasContext *ctx, arg_itype *a)
> +{
> +    /*
> +     * The TX79-specific instruction Store Quadword
> +     *
> +     * +--------+-------+-------+------------------------+
> +     * | 011111 |  base |   rt  |           offset       | SQ
> +     * +--------+-------+-------+------------------------+
> +     *      6       5       5                 16
> +     *
> +     * has the same opcode as the Read Hardware Register instruction
> +     *
> +     * +--------+-------+-------+-------+-------+--------+
> +     * | 011111 | 00000 |   rt  |   rd  | 00000 | 111011 | RDHWR
> +     * +--------+-------+-------+-------+-------+--------+
> +     *      6       5       5       5       5        6
> +     *
> +     * that is required, trapped and emulated by the Linux kernel. However, all
> +     * RDHWR encodings yield address error exceptions on the TX79 since the SQ
> +     * offset is odd.

Not that it's odd (the final address is masked, remember), but that it a store
to an address in the zero page.

> Therefore all valid SQ instructions can execute normally.
> +     * In user mode, QEMU must verify the upper and lower 13 bits to distinguish

11 bits.

> +     * between SQ and RDHWR, as the Linux kernel does.
> +     */
> +#if defined(CONFIG_USER_ONLY)
> +    if (!a->base && extract32(a->offset, 0, 11) == 0b00000111011) {
> +        gen_rdhwr(ctx, a->rt, extract32(ctx->opcode, 11, 5), 0);
> +        return true;
> +    }
> +#endif

I would do this as

{
  RDHWR_user  011111 00000 ..... ..... 00000 111011   @rd_rt
  SQ          011111 ..... ..... ................     @ldst
}

static bool trans_RDHWR_user(DisasContext *ctx, arg_rtype *a)
{
#ifdef CONFIG_USER_ONLY
    gen_rdhwr(ctx, a->rt, a->rd, 0);
    return true;
#else
    return false;
#endif
}


r~

Re: [RFC PATCH 28/42] target/mips/tx79: Move RDHWR usermode kludge to trans_SQ()
Posted by Fredrik Noring 4 years, 9 months ago
On Mon, Feb 15, 2021 at 01:01:52PM -0800, Richard Henderson wrote:
> On 2/14/21 9:58 AM, Philippe Mathieu-Daudé wrote:
> > +    /*
> > +     * The TX79-specific instruction Store Quadword
> > +     *
> > +     * +--------+-------+-------+------------------------+
> > +     * | 011111 |  base |   rt  |           offset       | SQ
> > +     * +--------+-------+-------+------------------------+
> > +     *      6       5       5                 16
> > +     *
> > +     * has the same opcode as the Read Hardware Register instruction
> > +     *
> > +     * +--------+-------+-------+-------+-------+--------+
> > +     * | 011111 | 00000 |   rt  |   rd  | 00000 | 111011 | RDHWR
> > +     * +--------+-------+-------+-------+-------+--------+
> > +     *      6       5       5       5       5        6
> > +     *
> > +     * that is required, trapped and emulated by the Linux kernel. However, all
> > +     * RDHWR encodings yield address error exceptions on the TX79 since the SQ
> > +     * offset is odd.
> 
> Not that it's odd (the final address is masked, remember), but that it a store
> to an address in the zero page.

The address always resolves to 0xffffe83b (then masked) in 32-bit KSEG2,
because rt is always $3 and rd is always $29 so -6085(zero), hence the
last page (which is much better) rather than the first, as Maciej
discovered:

https://patchwork.kernel.org/comment/23824173/

Other possible RDHWR encodings are no longer used, and can therefore be
ignored and revert to SQ:

https://patchwork.kernel.org/comment/23842167/

Notice also the oddity of 32-bit R5900 addresses that doesn't matter here
but has implications for n32 and system emulation.

> I would do this as
> 
> {
>   RDHWR_user  011111 00000 ..... ..... 00000 111011   @rd_rt
>   SQ          011111 ..... ..... ................     @ldst
> }

Both rd and rt have fixed values, as mentioned.

For reference, RDHWR is currently done like this in the Linux kernel:

	if (IS_ENABLED(CONFIG_CPU_R5900)) {
		/*
		 * On the R5900, a valid RDHWR instruction
		 *
		 *     +--------+-------+----+----+-------+--------+
		 *     | 011111 | 00000 | rt | rd | 00000 | 111011 |
		 *     +--------+-------+----+----+-------+--------+
		 *          6       5      5    5     5        6
		 *
		 * having rt $3 (v1) and rd $29 (MIPS_HWR_ULR) is
		 * interpreted as the R5900 specific SQ instruction
		 *
		 *     +--------+-------+----+---------------------+
		 *     | 011111 |  base | rt |        offset       |
		 *     +--------+-------+----+---------------------+
		 *          6       5      5            16
		 *
		 * with
		 *
		 *     sq v1,-6085(zero)
		 *
		 * that asserts an address exception since -6085(zero)
		 * always resolves to 0xffffe83b in 32-bit KSEG2.
		 *
		 * Other legacy values of rd, such as MIPS_HWR_CPUNUM,
		 * are ignored.
		 */
		if (insn.r_format.func == rdhwr_op &&
		    insn.r_format.rd == MIPS_HWR_ULR &&
		    insn.r_format.rt == 3 &&
		    insn.r_format.rs == 0 &&
		    insn.r_format.re == 0) {
			if (compute_return_epc(regs) < 0 ||
			    simulate_rdhwr(regs, insn.r_format.rd,
					   insn.r_format.rt) < 0)
				goto sigill;
			return;
		}
		goto sigbus;
	} else ...

Fredrik

Re: [RFC PATCH 28/42] target/mips/tx79: Move RDHWR usermode kludge to trans_SQ()
Posted by Maciej W. Rozycki 4 years, 9 months ago
On Tue, 16 Feb 2021, Fredrik Noring wrote:

> > Not that it's odd (the final address is masked, remember), but that it a store
> > to an address in the zero page.
> 
> The address always resolves to 0xffffe83b (then masked) in 32-bit KSEG2,
> because rt is always $3 and rd is always $29 so -6085(zero), hence the
> last page (which is much better) rather than the first, as Maciej
> discovered:
> 
> https://patchwork.kernel.org/comment/23824173/
> 
> Other possible RDHWR encodings are no longer used, and can therefore be
> ignored and revert to SQ:
> 
> https://patchwork.kernel.org/comment/23842167/

 Or rather were never used in the general case (I can't rule out someone 
using that stuff for something, but I wouldn't call it supported; I used 
some of it internally while evaluating the speed of RDHWR emulation before 
the use of $3 or indeed RDHWR was settled in the TLS psABI, though the 
actual code that ultimately went into Linux was developed independently).

> > I would do this as
> > 
> > {
> >   RDHWR_user  011111 00000 ..... ..... 00000 111011   @rd_rt
> >   SQ          011111 ..... ..... ................     @ldst
> > }
> 
> Both rd and rt have fixed values, as mentioned.

 I would suggest actually supporting variable `rt', see below.  Would it 
be a problem?

> For reference, RDHWR is currently done like this in the Linux kernel:
> 
> 	if (IS_ENABLED(CONFIG_CPU_R5900)) {
> 		/*
> 		 * On the R5900, a valid RDHWR instruction
> 		 *
> 		 *     +--------+-------+----+----+-------+--------+
> 		 *     | 011111 | 00000 | rt | rd | 00000 | 111011 |
> 		 *     +--------+-------+----+----+-------+--------+
> 		 *          6       5      5    5     5        6
> 		 *
> 		 * having rt $3 (v1) and rd $29 (MIPS_HWR_ULR) is
> 		 * interpreted as the R5900 specific SQ instruction
> 		 *
> 		 *     +--------+-------+----+---------------------+
> 		 *     | 011111 |  base | rt |        offset       |
> 		 *     +--------+-------+----+---------------------+
> 		 *          6       5      5            16
> 		 *
> 		 * with
> 		 *
> 		 *     sq v1,-6085(zero)
> 		 *
> 		 * that asserts an address exception since -6085(zero)
> 		 * always resolves to 0xffffe83b in 32-bit KSEG2.
> 		 *
> 		 * Other legacy values of rd, such as MIPS_HWR_CPUNUM,
> 		 * are ignored.
> 		 */
> 		if (insn.r_format.func == rdhwr_op &&
> 		    insn.r_format.rd == MIPS_HWR_ULR &&
> 		    insn.r_format.rt == 3 &&

 I suggest leaving the `rt' check out for consistency, as changing the 
register to read the value of UserLocal into from psABI-mandated $3 does 
not cause any issue with the R5900 (the `rt' field overlaps between both 
machine instructions, so the encoding placed there does not affect the 
KSEG2 access trap caused) and those encodings are also emulated in the 
slow path for other legacy ISA CPUs:

	case MIPS_HWR_ULR:		/* Read UserLocal register */
		regs->regs[rt] = ti->tp_value;
		return 0;


 So e.g. `rdhwr $25, $29' is interpreted as `sq $25,-6085($0)' by the 
R5900 => no issue, it still traps.

 I know I have previously written that we can ignore `rt' encodings other 
than $3, but they are harmless and handling them saves a couple of machine 
instructions needed to make the check, so I think while we can, we do not 
actually have to ignore them.

> 		    insn.r_format.rs == 0 &&
> 		    insn.r_format.re == 0) {
> 			if (compute_return_epc(regs) < 0 ||
> 			    simulate_rdhwr(regs, insn.r_format.rd,
> 					   insn.r_format.rt) < 0)
> 				goto sigill;
> 			return;
> 		}
> 		goto sigbus;
> 	} else ...

 Code continuation quoted left for reference.

  Maciej

Re: [RFC PATCH 28/42] target/mips/tx79: Move RDHWR usermode kludge to trans_SQ()
Posted by Fredrik Noring 4 years, 9 months ago
On Tue, Feb 16, 2021 at 01:21:34PM +0100, Maciej W. Rozycki wrote:
> > > I would do this as
> > > 
> > > {
> > >   RDHWR_user  011111 00000 ..... ..... 00000 111011   @rd_rt
> > >   SQ          011111 ..... ..... ................     @ldst
> > > }
> > 
> > Both rd and rt have fixed values, as mentioned.
> 
>  I would suggest actually supporting variable `rt', see below.  Would it 
> be a problem?

No problem on my part, I'll update the kernel code!

Fredrik