If a 32-bit input is not properly nanboxed, then the input is replaced
with the default qnan. The only inline expansion is for the sign-changing
set of instructions: FSGNJ.S, FSGNJX.S, FSGNJN.S.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/riscv/insn_trans/trans_rvf.inc.c | 71 +++++++++++++++++++------
target/riscv/translate.c | 18 +++++++
2 files changed, 73 insertions(+), 16 deletions(-)
diff --git a/target/riscv/insn_trans/trans_rvf.inc.c b/target/riscv/insn_trans/trans_rvf.inc.c
index 264d3139f1..f9a9e0643a 100644
--- a/target/riscv/insn_trans/trans_rvf.inc.c
+++ b/target/riscv/insn_trans/trans_rvf.inc.c
@@ -161,47 +161,86 @@ static bool trans_fsgnj_s(DisasContext *ctx, arg_fsgnj_s *a)
{
REQUIRE_FPU;
REQUIRE_EXT(ctx, RVF);
+
if (a->rs1 == a->rs2) { /* FMOV */
- tcg_gen_mov_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1]);
+ gen_check_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rs1]);
} else { /* FSGNJ */
- tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rs2], cpu_fpr[a->rs1],
- 0, 31);
+ TCGv_i64 rs1 = tcg_temp_new_i64();
+ TCGv_i64 rs2 = tcg_temp_new_i64();
+
+ gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
+ gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
+
+ /* This formulation retains the nanboxing of rs2. */
+ tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 31);
+ tcg_temp_free_i64(rs1);
+ tcg_temp_free_i64(rs2);
}
- gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
mark_fs_dirty(ctx);
return true;
}
static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a)
{
+ TCGv_i64 rs1, rs2, mask;
+
REQUIRE_FPU;
REQUIRE_EXT(ctx, RVF);
+
+ rs1 = tcg_temp_new_i64();
+ gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
+
if (a->rs1 == a->rs2) { /* FNEG */
- tcg_gen_xori_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], INT32_MIN);
+ tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, MAKE_64BIT_MASK(31, 1));
} else {
- TCGv_i64 t0 = tcg_temp_new_i64();
- tcg_gen_not_i64(t0, cpu_fpr[a->rs2]);
- tcg_gen_deposit_i64(cpu_fpr[a->rd], t0, cpu_fpr[a->rs1], 0, 31);
- tcg_temp_free_i64(t0);
+ rs2 = tcg_temp_new_i64();
+ gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
+
+ /*
+ * Replace bit 31 in rs1 with inverse in rs2.
+ * This formulation retains the nanboxing of rs1.
+ */
+ mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1));
+ tcg_gen_andc_i64(rs2, mask, rs2);
+ tcg_gen_and_i64(rs1, mask, rs1);
+ tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2);
+
+ tcg_temp_free_i64(mask);
+ tcg_temp_free_i64(rs2);
}
- gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
+ tcg_temp_free_i64(rs1);
+
mark_fs_dirty(ctx);
return true;
}
static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a)
{
+ TCGv_i64 rs1, rs2;
+
REQUIRE_FPU;
REQUIRE_EXT(ctx, RVF);
+
+ rs1 = tcg_temp_new_i64();
+ gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
+
if (a->rs1 == a->rs2) { /* FABS */
- tcg_gen_andi_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], ~INT32_MIN);
+ tcg_gen_andi_i64(cpu_fpr[a->rd], rs1, ~MAKE_64BIT_MASK(31, 1));
} else {
- TCGv_i64 t0 = tcg_temp_new_i64();
- tcg_gen_andi_i64(t0, cpu_fpr[a->rs2], INT32_MIN);
- tcg_gen_xor_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], t0);
- tcg_temp_free_i64(t0);
+ rs2 = tcg_temp_new_i64();
+ gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
+
+ /*
+ * Xor bit 31 in rs1 with that in rs2.
+ * This formulation retains the nanboxing of rs1.
+ */
+ tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1));
+ tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2);
+
+ tcg_temp_free_i64(rs2);
}
- gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
+ tcg_temp_free_i64(rs1);
+
mark_fs_dirty(ctx);
return true;
}
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 12a746da97..bf35182776 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -101,6 +101,24 @@ static void gen_nanbox_s(TCGv_i64 out, TCGv_i64 in)
tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32));
}
+/*
+ * A narrow n-bit operation, where n < FLEN, checks that input operands
+ * are correctly Nan-boxed, i.e., all upper FLEN - n bits are 1.
+ * If so, the least-significant bits of the input are used, otherwise the
+ * input value is treated as an n-bit canonical NaN (v2.2 section 9.2).
+ *
+ * Here, the result is always nan-boxed, even the canonical nan.
+ */
+static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in)
+{
+ TCGv_i64 t_max = tcg_const_i64(0xffffffff00000000ull);
+ TCGv_i64 t_nan = tcg_const_i64(0xffffffff7fc00000ull);
+
+ tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan);
+ tcg_temp_free_i64(t_max);
+ tcg_temp_free_i64(t_nan);
+}
+
static void generate_exception(DisasContext *ctx, int excp)
{
tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next);
--
2.25.1
On 2020/7/24 8:28, Richard Henderson wrote:
> If a 32-bit input is not properly nanboxed, then the input is replaced
> with the default qnan. The only inline expansion is for the sign-changing
> set of instructions: FSGNJ.S, FSGNJX.S, FSGNJN.S.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/riscv/insn_trans/trans_rvf.inc.c | 71 +++++++++++++++++++------
> target/riscv/translate.c | 18 +++++++
> 2 files changed, 73 insertions(+), 16 deletions(-)
>
> diff --git a/target/riscv/insn_trans/trans_rvf.inc.c b/target/riscv/insn_trans/trans_rvf.inc.c
> index 264d3139f1..f9a9e0643a 100644
> --- a/target/riscv/insn_trans/trans_rvf.inc.c
> +++ b/target/riscv/insn_trans/trans_rvf.inc.c
> @@ -161,47 +161,86 @@ static bool trans_fsgnj_s(DisasContext *ctx, arg_fsgnj_s *a)
> {
> REQUIRE_FPU;
> REQUIRE_EXT(ctx, RVF);
> +
> if (a->rs1 == a->rs2) { /* FMOV */
> - tcg_gen_mov_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1]);
> + gen_check_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rs1]);
> } else { /* FSGNJ */
> - tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rs2], cpu_fpr[a->rs1],
> - 0, 31);
> + TCGv_i64 rs1 = tcg_temp_new_i64();
> + TCGv_i64 rs2 = tcg_temp_new_i64();
> +
> + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
> + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
> +
> + /* This formulation retains the nanboxing of rs2. */
> + tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 31);
> + tcg_temp_free_i64(rs1);
> + tcg_temp_free_i64(rs2);
> }
> - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
> mark_fs_dirty(ctx);
> return true;
> }
>
> static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a)
> {
> + TCGv_i64 rs1, rs2, mask;
> +
> REQUIRE_FPU;
> REQUIRE_EXT(ctx, RVF);
> +
> + rs1 = tcg_temp_new_i64();
> + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
> +
> if (a->rs1 == a->rs2) { /* FNEG */
> - tcg_gen_xori_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], INT32_MIN);
> + tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, MAKE_64BIT_MASK(31, 1));
> } else {
> - TCGv_i64 t0 = tcg_temp_new_i64();
> - tcg_gen_not_i64(t0, cpu_fpr[a->rs2]);
> - tcg_gen_deposit_i64(cpu_fpr[a->rd], t0, cpu_fpr[a->rs1], 0, 31);
> - tcg_temp_free_i64(t0);
> + rs2 = tcg_temp_new_i64();
> + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
> +
> + /*
> + * Replace bit 31 in rs1 with inverse in rs2.
> + * This formulation retains the nanboxing of rs1.
> + */
> + mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1));
> + tcg_gen_andc_i64(rs2, mask, rs2);
> + tcg_gen_and_i64(rs1, mask, rs1);
> + tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2);
> +
> + tcg_temp_free_i64(mask);
> + tcg_temp_free_i64(rs2);
> }
> - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
> + tcg_temp_free_i64(rs1);
> +
> mark_fs_dirty(ctx);
> return true;
> }
>
> static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a)
> {
> + TCGv_i64 rs1, rs2;
> +
> REQUIRE_FPU;
> REQUIRE_EXT(ctx, RVF);
> +
> + rs1 = tcg_temp_new_i64();
> + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
> +
> if (a->rs1 == a->rs2) { /* FABS */
> - tcg_gen_andi_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], ~INT32_MIN);
> + tcg_gen_andi_i64(cpu_fpr[a->rd], rs1, ~MAKE_64BIT_MASK(31, 1));
> } else {
> - TCGv_i64 t0 = tcg_temp_new_i64();
> - tcg_gen_andi_i64(t0, cpu_fpr[a->rs2], INT32_MIN);
> - tcg_gen_xor_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], t0);
> - tcg_temp_free_i64(t0);
> + rs2 = tcg_temp_new_i64();
> + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
> +
> + /*
> + * Xor bit 31 in rs1 with that in rs2.
> + * This formulation retains the nanboxing of rs1.
> + */
> + tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1));
> + tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2);
> +
> + tcg_temp_free_i64(rs2);
> }
> - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
> + tcg_temp_free_i64(rs1);
> +
> mark_fs_dirty(ctx);
> return true;
> }
> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> index 12a746da97..bf35182776 100644
> --- a/target/riscv/translate.c
> +++ b/target/riscv/translate.c
> @@ -101,6 +101,24 @@ static void gen_nanbox_s(TCGv_i64 out, TCGv_i64 in)
> tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32));
> }
>
> +/*
> + * A narrow n-bit operation, where n < FLEN, checks that input operands
> + * are correctly Nan-boxed, i.e., all upper FLEN - n bits are 1.
> + * If so, the least-significant bits of the input are used, otherwise the
> + * input value is treated as an n-bit canonical NaN (v2.2 section 9.2).
> + *
> + * Here, the result is always nan-boxed, even the canonical nan.
> + */
> +static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in)
> +{
> + TCGv_i64 t_max = tcg_const_i64(0xffffffff00000000ull);
> + TCGv_i64 t_nan = tcg_const_i64(0xffffffff7fc00000ull);
> +
> + tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan);
> + tcg_temp_free_i64(t_max);
> + tcg_temp_free_i64(t_nan);
> +}
> +
Reviewed-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
Zhiwei
> static void generate_exception(DisasContext *ctx, int excp)
> {
> tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next);
On Fri, Jul 24, 2020 at 8:28 AM Richard Henderson <
richard.henderson@linaro.org> wrote:
> If a 32-bit input is not properly nanboxed, then the input is replaced
> with the default qnan. The only inline expansion is for the sign-changing
> set of instructions: FSGNJ.S, FSGNJX.S, FSGNJN.S.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/riscv/insn_trans/trans_rvf.inc.c | 71 +++++++++++++++++++------
> target/riscv/translate.c | 18 +++++++
> 2 files changed, 73 insertions(+), 16 deletions(-)
>
> diff --git a/target/riscv/insn_trans/trans_rvf.inc.c
> b/target/riscv/insn_trans/trans_rvf.inc.c
> index 264d3139f1..f9a9e0643a 100644
> --- a/target/riscv/insn_trans/trans_rvf.inc.c
> +++ b/target/riscv/insn_trans/trans_rvf.inc.c
> @@ -161,47 +161,86 @@ static bool trans_fsgnj_s(DisasContext *ctx,
> arg_fsgnj_s *a)
> {
> REQUIRE_FPU;
> REQUIRE_EXT(ctx, RVF);
> +
> if (a->rs1 == a->rs2) { /* FMOV */
> - tcg_gen_mov_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1]);
> + gen_check_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rs1]);
> } else { /* FSGNJ */
> - tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rs2],
> cpu_fpr[a->rs1],
> - 0, 31);
> + TCGv_i64 rs1 = tcg_temp_new_i64();
> + TCGv_i64 rs2 = tcg_temp_new_i64();
> +
> + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
> + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
> +
> + /* This formulation retains the nanboxing of rs2. */
> + tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 31);
> + tcg_temp_free_i64(rs1);
> + tcg_temp_free_i64(rs2);
> }
> - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
> mark_fs_dirty(ctx);
> return true;
> }
>
> static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a)
> {
> + TCGv_i64 rs1, rs2, mask;
> +
> REQUIRE_FPU;
> REQUIRE_EXT(ctx, RVF);
> +
> + rs1 = tcg_temp_new_i64();
> + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
> +
> if (a->rs1 == a->rs2) { /* FNEG */
> - tcg_gen_xori_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], INT32_MIN);
> + tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, MAKE_64BIT_MASK(31, 1));
> } else {
> - TCGv_i64 t0 = tcg_temp_new_i64();
> - tcg_gen_not_i64(t0, cpu_fpr[a->rs2]);
> - tcg_gen_deposit_i64(cpu_fpr[a->rd], t0, cpu_fpr[a->rs1], 0, 31);
> - tcg_temp_free_i64(t0);
> + rs2 = tcg_temp_new_i64();
> + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
> +
> + /*
> + * Replace bit 31 in rs1 with inverse in rs2.
> + * This formulation retains the nanboxing of rs1.
> + */
> + mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1));
> + tcg_gen_andc_i64(rs2, mask, rs2);
> + tcg_gen_and_i64(rs1, mask, rs1);
> + tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2);
> +
> + tcg_temp_free_i64(mask);
> + tcg_temp_free_i64(rs2);
> }
> - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
> + tcg_temp_free_i64(rs1);
> +
> mark_fs_dirty(ctx);
> return true;
> }
>
> static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a)
> {
> + TCGv_i64 rs1, rs2;
> +
> REQUIRE_FPU;
> REQUIRE_EXT(ctx, RVF);
> +
> + rs1 = tcg_temp_new_i64();
> + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
> +
> if (a->rs1 == a->rs2) { /* FABS */
> - tcg_gen_andi_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], ~INT32_MIN);
> + tcg_gen_andi_i64(cpu_fpr[a->rd], rs1, ~MAKE_64BIT_MASK(31, 1));
> } else {
> - TCGv_i64 t0 = tcg_temp_new_i64();
> - tcg_gen_andi_i64(t0, cpu_fpr[a->rs2], INT32_MIN);
> - tcg_gen_xor_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], t0);
> - tcg_temp_free_i64(t0);
> + rs2 = tcg_temp_new_i64();
> + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
> +
> + /*
> + * Xor bit 31 in rs1 with that in rs2.
> + * This formulation retains the nanboxing of rs1.
> + */
> + tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1));
> + tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2);
> +
> + tcg_temp_free_i64(rs2);
> }
> - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
> + tcg_temp_free_i64(rs1);
> +
> mark_fs_dirty(ctx);
> return true;
> }
> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> index 12a746da97..bf35182776 100644
> --- a/target/riscv/translate.c
> +++ b/target/riscv/translate.c
> @@ -101,6 +101,24 @@ static void gen_nanbox_s(TCGv_i64 out, TCGv_i64 in)
> tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32));
> }
>
> +/*
> + * A narrow n-bit operation, where n < FLEN, checks that input operands
> + * are correctly Nan-boxed, i.e., all upper FLEN - n bits are 1.
> + * If so, the least-significant bits of the input are used, otherwise the
> + * input value is treated as an n-bit canonical NaN (v2.2 section 9.2).
> + *
> + * Here, the result is always nan-boxed, even the canonical nan.
> + */
> +static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in)
> +{
> + TCGv_i64 t_max = tcg_const_i64(0xffffffff00000000ull);
> + TCGv_i64 t_nan = tcg_const_i64(0xffffffff7fc00000ull);
> +
> + tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan);
> + tcg_temp_free_i64(t_max);
> + tcg_temp_free_i64(t_nan);
> +}
> +
> static void generate_exception(DisasContext *ctx, int excp)
> {
> tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next);
> --
> 2.25.1
>
>
>
Reviewed-by: Chih-Min Chao <chihmin.chao@sifive.com>
Chih-Min Chao
On Fri, Jul 24, 2020 at 8:28 AM Richard Henderson <
richard.henderson@linaro.org> wrote:
> If a 32-bit input is not properly nanboxed, then the input is replaced
> with the default qnan. The only inline expansion is for the sign-changing
> set of instructions: FSGNJ.S, FSGNJX.S, FSGNJN.S.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/riscv/insn_trans/trans_rvf.inc.c | 71 +++++++++++++++++++------
> target/riscv/translate.c | 18 +++++++
> 2 files changed, 73 insertions(+), 16 deletions(-)
>
> diff --git a/target/riscv/insn_trans/trans_rvf.inc.c
> b/target/riscv/insn_trans/trans_rvf.inc.c
> index 264d3139f1..f9a9e0643a 100644
> --- a/target/riscv/insn_trans/trans_rvf.inc.c
> +++ b/target/riscv/insn_trans/trans_rvf.inc.c
> @@ -161,47 +161,86 @@ static bool trans_fsgnj_s(DisasContext *ctx,
> arg_fsgnj_s *a)
> {
> REQUIRE_FPU;
> REQUIRE_EXT(ctx, RVF);
> +
> if (a->rs1 == a->rs2) { /* FMOV */
> - tcg_gen_mov_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1]);
> + gen_check_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rs1]);
> } else { /* FSGNJ */
> - tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rs2],
> cpu_fpr[a->rs1],
> - 0, 31);
> + TCGv_i64 rs1 = tcg_temp_new_i64();
> + TCGv_i64 rs2 = tcg_temp_new_i64();
> +
> + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
> + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
> +
> + /* This formulation retains the nanboxing of rs2. */
> + tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 31);
> + tcg_temp_free_i64(rs1);
> + tcg_temp_free_i64(rs2);
> }
> - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
> mark_fs_dirty(ctx);
> return true;
> }
>
> static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a)
> {
> + TCGv_i64 rs1, rs2, mask;
> +
> REQUIRE_FPU;
> REQUIRE_EXT(ctx, RVF);
> +
> + rs1 = tcg_temp_new_i64();
> + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
> +
> if (a->rs1 == a->rs2) { /* FNEG */
> - tcg_gen_xori_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], INT32_MIN);
> + tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, MAKE_64BIT_MASK(31, 1));
> } else {
> - TCGv_i64 t0 = tcg_temp_new_i64();
> - tcg_gen_not_i64(t0, cpu_fpr[a->rs2]);
> - tcg_gen_deposit_i64(cpu_fpr[a->rd], t0, cpu_fpr[a->rs1], 0, 31);
> - tcg_temp_free_i64(t0);
> + rs2 = tcg_temp_new_i64();
> + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
> +
> + /*
> + * Replace bit 31 in rs1 with inverse in rs2.
> + * This formulation retains the nanboxing of rs1.
> + */
> + mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1));
> + tcg_gen_andc_i64(rs2, mask, rs2);
>
should be
tcg_gen_not_i64(rs2, rs2); // forget to inverse rs2
tcg_gen_andc_i64(rs2, rs2, mask); //mask needs to be
inverted to get only sign
Chih-Min Chao
> + tcg_gen_and_i64(rs1, mask, rs1);
> + tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2);
> +
> + tcg_temp_free_i64(mask);
> + tcg_temp_free_i64(rs2);
> }
> - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
> + tcg_temp_free_i64(rs1);
> +
> mark_fs_dirty(ctx);
> return true;
> }
>
> static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a)
> {
> + TCGv_i64 rs1, rs2;
> +
> REQUIRE_FPU;
> REQUIRE_EXT(ctx, RVF);
> +
> + rs1 = tcg_temp_new_i64();
> + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
> +
> if (a->rs1 == a->rs2) { /* FABS */
> - tcg_gen_andi_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], ~INT32_MIN);
> + tcg_gen_andi_i64(cpu_fpr[a->rd], rs1, ~MAKE_64BIT_MASK(31, 1));
> } else {
> - TCGv_i64 t0 = tcg_temp_new_i64();
> - tcg_gen_andi_i64(t0, cpu_fpr[a->rs2], INT32_MIN);
> - tcg_gen_xor_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], t0);
> - tcg_temp_free_i64(t0);
> + rs2 = tcg_temp_new_i64();
> + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
> +
> + /*
> + * Xor bit 31 in rs1 with that in rs2.
> + * This formulation retains the nanboxing of rs1.
> + */
> + tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1));
> + tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2);
> +
> + tcg_temp_free_i64(rs2);
> }
> - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
> + tcg_temp_free_i64(rs1);
> +
> mark_fs_dirty(ctx);
> return true;
> }
> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> index 12a746da97..bf35182776 100644
> --- a/target/riscv/translate.c
> +++ b/target/riscv/translate.c
> @@ -101,6 +101,24 @@ static void gen_nanbox_s(TCGv_i64 out, TCGv_i64 in)
> tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32));
> }
>
> +/*
> + * A narrow n-bit operation, where n < FLEN, checks that input operands
> + * are correctly Nan-boxed, i.e., all upper FLEN - n bits are 1.
> + * If so, the least-significant bits of the input are used, otherwise the
> + * input value is treated as an n-bit canonical NaN (v2.2 section 9.2).
> + *
> + * Here, the result is always nan-boxed, even the canonical nan.
> + */
> +static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in)
> +{
> + TCGv_i64 t_max = tcg_const_i64(0xffffffff00000000ull);
> + TCGv_i64 t_nan = tcg_const_i64(0xffffffff7fc00000ull);
> +
> + tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan);
> + tcg_temp_free_i64(t_max);
> + tcg_temp_free_i64(t_nan);
> +}
> +
> static void generate_exception(DisasContext *ctx, int excp)
> {
> tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next);
> --
> 2.25.1
>
>
>
On 2020/8/8 4:24, Chih-Min Chao wrote:
> On Fri, Jul 24, 2020 at 8:28 AM Richard Henderson
> <richard.henderson@linaro.org <mailto:richard.henderson@linaro.org>>
> wrote:
>
> If a 32-bit input is not properly nanboxed, then the input is replaced
> with the default qnan. The only inline expansion is for the
> sign-changing
> set of instructions: FSGNJ.S, FSGNJX.S, FSGNJN.S.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org
> <mailto:richard.henderson@linaro.org>>
> ---
> target/riscv/insn_trans/trans_rvf.inc.c | 71
> +++++++++++++++++++------
> target/riscv/translate.c | 18 +++++++
> 2 files changed, 73 insertions(+), 16 deletions(-)
>
> diff --git a/target/riscv/insn_trans/trans_rvf.inc.c
> b/target/riscv/insn_trans/trans_rvf.inc.c
> index 264d3139f1..f9a9e0643a 100644
> --- a/target/riscv/insn_trans/trans_rvf.inc.c
> +++ b/target/riscv/insn_trans/trans_rvf.inc.c
> @@ -161,47 +161,86 @@ static bool trans_fsgnj_s(DisasContext *ctx,
> arg_fsgnj_s *a)
> {
> REQUIRE_FPU;
> REQUIRE_EXT(ctx, RVF);
> +
> if (a->rs1 == a->rs2) { /* FMOV */
> - tcg_gen_mov_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1]);
> + gen_check_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rs1]);
> } else { /* FSGNJ */
> - tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rs2],
> cpu_fpr[a->rs1],
> - 0, 31);
> + TCGv_i64 rs1 = tcg_temp_new_i64();
> + TCGv_i64 rs2 = tcg_temp_new_i64();
> +
> + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
> + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
> +
> + /* This formulation retains the nanboxing of rs2. */
> + tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 31);
> + tcg_temp_free_i64(rs1);
> + tcg_temp_free_i64(rs2);
> }
> - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
> mark_fs_dirty(ctx);
> return true;
> }
>
> static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a)
> {
> + TCGv_i64 rs1, rs2, mask;
> +
> REQUIRE_FPU;
> REQUIRE_EXT(ctx, RVF);
> +
> + rs1 = tcg_temp_new_i64();
> + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
> +
> if (a->rs1 == a->rs2) { /* FNEG */
> - tcg_gen_xori_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], INT32_MIN);
> + tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, MAKE_64BIT_MASK(31,
> 1));
> } else {
> - TCGv_i64 t0 = tcg_temp_new_i64();
> - tcg_gen_not_i64(t0, cpu_fpr[a->rs2]);
> - tcg_gen_deposit_i64(cpu_fpr[a->rd], t0, cpu_fpr[a->rs1],
> 0, 31);
> - tcg_temp_free_i64(t0);
> + rs2 = tcg_temp_new_i64();
> + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
> +
> + /*
> + * Replace bit 31 in rs1 with inverse in rs2.
> + * This formulation retains the nanboxing of rs1.
> + */
> + mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1));
> + tcg_gen_andc_i64(rs2, mask, rs2);
>
>
> should be
> tcg_gen_not_i64(rs2, rs2); // forget to inverse rs2
> tcg_gen_andc_i64(rs2, rs2, mask); //mask needs to be
> inverted to get only sign
Hi Chih-Min,
Thanks for pointing it out. It's a bug here. However, I think it should be
tcg_gen_andc_i64(rs2, rs2, mask); // only get rs2 bit 31
tcg_gen_not_i64(rs2, rs2); // inverse rs2
Best Regards,
Zhiwei
>
> Chih-Min Chao
>
> + tcg_gen_and_i64(rs1, mask, rs1);
> + tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2);
> +
> + tcg_temp_free_i64(mask);
> + tcg_temp_free_i64(rs2);
> }
> - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
> + tcg_temp_free_i64(rs1);
> +
> mark_fs_dirty(ctx);
> return true;
> }
>
> static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a)
> {
> + TCGv_i64 rs1, rs2;
> +
> REQUIRE_FPU;
> REQUIRE_EXT(ctx, RVF);
> +
> + rs1 = tcg_temp_new_i64();
> + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
> +
> if (a->rs1 == a->rs2) { /* FABS */
> - tcg_gen_andi_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1],
> ~INT32_MIN);
> + tcg_gen_andi_i64(cpu_fpr[a->rd], rs1,
> ~MAKE_64BIT_MASK(31, 1));
> } else {
> - TCGv_i64 t0 = tcg_temp_new_i64();
> - tcg_gen_andi_i64(t0, cpu_fpr[a->rs2], INT32_MIN);
> - tcg_gen_xor_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], t0);
> - tcg_temp_free_i64(t0);
> + rs2 = tcg_temp_new_i64();
> + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
> +
> + /*
> + * Xor bit 31 in rs1 with that in rs2.
> + * This formulation retains the nanboxing of rs1.
> + */
> + tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1));
> + tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2);
> +
> + tcg_temp_free_i64(rs2);
> }
> - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
> + tcg_temp_free_i64(rs1);
> +
> mark_fs_dirty(ctx);
> return true;
> }
> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> index 12a746da97..bf35182776 100644
> --- a/target/riscv/translate.c
> +++ b/target/riscv/translate.c
> @@ -101,6 +101,24 @@ static void gen_nanbox_s(TCGv_i64 out,
> TCGv_i64 in)
> tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32));
> }
>
> +/*
> + * A narrow n-bit operation, where n < FLEN, checks that input
> operands
> + * are correctly Nan-boxed, i.e., all upper FLEN - n bits are 1.
> + * If so, the least-significant bits of the input are used,
> otherwise the
> + * input value is treated as an n-bit canonical NaN (v2.2 section
> 9.2).
> + *
> + * Here, the result is always nan-boxed, even the canonical nan.
> + */
> +static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in)
> +{
> + TCGv_i64 t_max = tcg_const_i64(0xffffffff00000000ull);
> + TCGv_i64 t_nan = tcg_const_i64(0xffffffff7fc00000ull);
> +
> + tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan);
> + tcg_temp_free_i64(t_max);
> + tcg_temp_free_i64(t_nan);
> +}
> +
> static void generate_exception(DisasContext *ctx, int excp)
> {
> tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next);
> --
> 2.25.1
>
>
On 2020/8/8 22:18, LIU Zhiwei wrote:
>
>
> On 2020/8/8 4:24, Chih-Min Chao wrote:
>> On Fri, Jul 24, 2020 at 8:28 AM Richard Henderson
>> <richard.henderson@linaro.org <mailto:richard.henderson@linaro.org>>
>> wrote:
>>
>> If a 32-bit input is not properly nanboxed, then the input is
>> replaced
>> with the default qnan. The only inline expansion is for the
>> sign-changing
>> set of instructions: FSGNJ.S, FSGNJX.S, FSGNJN.S.
>>
>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org
>> <mailto:richard.henderson@linaro.org>>
>> ---
>> target/riscv/insn_trans/trans_rvf.inc.c | 71
>> +++++++++++++++++++------
>> target/riscv/translate.c | 18 +++++++
>> 2 files changed, 73 insertions(+), 16 deletions(-)
>>
>> diff --git a/target/riscv/insn_trans/trans_rvf.inc.c
>> b/target/riscv/insn_trans/trans_rvf.inc.c
>> index 264d3139f1..f9a9e0643a 100644
>> --- a/target/riscv/insn_trans/trans_rvf.inc.c
>> +++ b/target/riscv/insn_trans/trans_rvf.inc.c
>> @@ -161,47 +161,86 @@ static bool trans_fsgnj_s(DisasContext
>> *ctx, arg_fsgnj_s *a)
>> {
>> REQUIRE_FPU;
>> REQUIRE_EXT(ctx, RVF);
>> +
>> if (a->rs1 == a->rs2) { /* FMOV */
>> - tcg_gen_mov_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1]);
>> + gen_check_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rs1]);
>> } else { /* FSGNJ */
>> - tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rs2],
>> cpu_fpr[a->rs1],
>> - 0, 31);
>> + TCGv_i64 rs1 = tcg_temp_new_i64();
>> + TCGv_i64 rs2 = tcg_temp_new_i64();
>> +
>> + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
>> + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
>> +
>> + /* This formulation retains the nanboxing of rs2. */
>> + tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 31);
>> + tcg_temp_free_i64(rs1);
>> + tcg_temp_free_i64(rs2);
>> }
>> - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
>> mark_fs_dirty(ctx);
>> return true;
>> }
>>
>> static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a)
>> {
>> + TCGv_i64 rs1, rs2, mask;
>> +
>> REQUIRE_FPU;
>> REQUIRE_EXT(ctx, RVF);
>> +
>> + rs1 = tcg_temp_new_i64();
>> + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
>> +
>> if (a->rs1 == a->rs2) { /* FNEG */
>> - tcg_gen_xori_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1],
>> INT32_MIN);
>> + tcg_gen_xori_i64(cpu_fpr[a->rd], rs1,
>> MAKE_64BIT_MASK(31, 1));
>> } else {
>> - TCGv_i64 t0 = tcg_temp_new_i64();
>> - tcg_gen_not_i64(t0, cpu_fpr[a->rs2]);
>> - tcg_gen_deposit_i64(cpu_fpr[a->rd], t0, cpu_fpr[a->rs1],
>> 0, 31);
>> - tcg_temp_free_i64(t0);
>> + rs2 = tcg_temp_new_i64();
>> + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
>> +
>> + /*
>> + * Replace bit 31 in rs1 with inverse in rs2.
>> + * This formulation retains the nanboxing of rs1.
>> + */
>> + mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1));
>> + tcg_gen_andc_i64(rs2, mask, rs2);
>>
>>
>> should be
>> tcg_gen_not_i64(rs2, rs2); // forget to inverse rs2
>> tcg_gen_andc_i64(rs2, rs2, mask); //mask needs to be
>> inverted to get only sign
> Hi Chih-Min,
>
> Thanks for pointing it out. It's a bug here. However, I think it
> should be
>
> tcg_gen_andc_i64(rs2, rs2, mask); // only get rs2 bit 31
> tcg_gen_not_i64(rs2, rs2); // inverse rs2
>
Hi Chih-Min,
Sorry, your code is right.
Zhiwei
> Best Regards,
> Zhiwei
>>
>> Chih-Min Chao
>>
>> + tcg_gen_and_i64(rs1, mask, rs1);
>> + tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2);
>> +
>> + tcg_temp_free_i64(mask);
>> + tcg_temp_free_i64(rs2);
>> }
>> - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
>> + tcg_temp_free_i64(rs1);
>> +
>> mark_fs_dirty(ctx);
>> return true;
>> }
>>
>> static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a)
>> {
>> + TCGv_i64 rs1, rs2;
>> +
>> REQUIRE_FPU;
>> REQUIRE_EXT(ctx, RVF);
>> +
>> + rs1 = tcg_temp_new_i64();
>> + gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
>> +
>> if (a->rs1 == a->rs2) { /* FABS */
>> - tcg_gen_andi_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1],
>> ~INT32_MIN);
>> + tcg_gen_andi_i64(cpu_fpr[a->rd], rs1,
>> ~MAKE_64BIT_MASK(31, 1));
>> } else {
>> - TCGv_i64 t0 = tcg_temp_new_i64();
>> - tcg_gen_andi_i64(t0, cpu_fpr[a->rs2], INT32_MIN);
>> - tcg_gen_xor_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], t0);
>> - tcg_temp_free_i64(t0);
>> + rs2 = tcg_temp_new_i64();
>> + gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
>> +
>> + /*
>> + * Xor bit 31 in rs1 with that in rs2.
>> + * This formulation retains the nanboxing of rs1.
>> + */
>> + tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1));
>> + tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2);
>> +
>> + tcg_temp_free_i64(rs2);
>> }
>> - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
>> + tcg_temp_free_i64(rs1);
>> +
>> mark_fs_dirty(ctx);
>> return true;
>> }
>> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
>> index 12a746da97..bf35182776 100644
>> --- a/target/riscv/translate.c
>> +++ b/target/riscv/translate.c
>> @@ -101,6 +101,24 @@ static void gen_nanbox_s(TCGv_i64 out,
>> TCGv_i64 in)
>> tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32));
>> }
>>
>> +/*
>> + * A narrow n-bit operation, where n < FLEN, checks that input
>> operands
>> + * are correctly Nan-boxed, i.e., all upper FLEN - n bits are 1.
>> + * If so, the least-significant bits of the input are used,
>> otherwise the
>> + * input value is treated as an n-bit canonical NaN (v2.2
>> section 9.2).
>> + *
>> + * Here, the result is always nan-boxed, even the canonical nan.
>> + */
>> +static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in)
>> +{
>> + TCGv_i64 t_max = tcg_const_i64(0xffffffff00000000ull);
>> + TCGv_i64 t_nan = tcg_const_i64(0xffffffff7fc00000ull);
>> +
>> + tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan);
>> + tcg_temp_free_i64(t_max);
>> + tcg_temp_free_i64(t_nan);
>> +}
>> +
>> static void generate_exception(DisasContext *ctx, int excp)
>> {
>> tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next);
>> --
>> 2.25.1
>>
>>
>
© 2016 - 2026 Red Hat, Inc.