[PATCH v5 16/57] tcg/tci: Clean up deposit operations

Richard Henderson posted 57 patches 4 years, 11 months ago
Maintainers: "Alex Bennée" <alex.bennee@linaro.org>, "Philippe Mathieu-Daudé" <f4bug@amsat.org>, Laurent Vivier <laurent@vivier.eu>, David Gibson <david@gibson.dropbear.id.au>, Greg Kurz <groug@kaod.org>, Paolo Bonzini <pbonzini@redhat.com>, Willian Rampazzo <willianr@redhat.com>, Eduardo Habkost <ehabkost@redhat.com>, Richard Henderson <richard.henderson@linaro.org>, Wainer dos Santos Moschetta <wainersm@redhat.com>, Thomas Huth <thuth@redhat.com>, Stefan Weil <sw@weilnetz.de>
There is a newer version of this series
[PATCH v5 16/57] tcg/tci: Clean up deposit operations
Posted by Richard Henderson 4 years, 11 months ago
Use the correct set of asserts during code generation.
We do not require the first input to overlap the output;
the existing interpreter already supported that.

Split out tci_args_rrrbb in the translator.
Use the deposit32/64 functions rather than inline expansion.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/tci/tcg-target-con-set.h |  1 -
 tcg/tci.c                    | 33 ++++++++++++++++-----------------
 tcg/tci/tcg-target.c.inc     | 24 ++++++++++++++----------
 3 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/tcg/tci/tcg-target-con-set.h b/tcg/tci/tcg-target-con-set.h
index f51b7bcb13..316730f32c 100644
--- a/tcg/tci/tcg-target-con-set.h
+++ b/tcg/tci/tcg-target-con-set.h
@@ -13,7 +13,6 @@ C_O0_I2(r, r)
 C_O0_I3(r, r, r)
 C_O0_I4(r, r, r, r)
 C_O1_I1(r, r)
-C_O1_I2(r, 0, r)
 C_O1_I2(r, r, r)
 C_O1_I4(r, r, r, r, r)
 C_O2_I1(r, r, r)
diff --git a/tcg/tci.c b/tcg/tci.c
index 10f58e4f25..3ce2b72316 100644
--- a/tcg/tci.c
+++ b/tcg/tci.c
@@ -168,6 +168,7 @@ static tcg_target_ulong tci_read_label(const uint8_t **tb_ptr)
  *   tci_args_<arguments>
  * where arguments is a sequence of
  *
+ *   b = immediate (bit position)
  *   i = immediate (uint32_t)
  *   I = immediate (tcg_target_ulong)
  *   r = register
@@ -236,6 +237,16 @@ static void tci_args_rrrc(const uint8_t **tb_ptr,
     *c3 = tci_read_b(tb_ptr);
 }
 
+static void tci_args_rrrbb(const uint8_t **tb_ptr, TCGReg *r0, TCGReg *r1,
+                           TCGReg *r2, uint8_t *i3, uint8_t *i4)
+{
+    *r0 = tci_read_r(tb_ptr);
+    *r1 = tci_read_r(tb_ptr);
+    *r2 = tci_read_r(tb_ptr);
+    *i3 = tci_read_b(tb_ptr);
+    *i4 = tci_read_b(tb_ptr);
+}
+
 #if TCG_TARGET_REG_BITS == 32
 static void tci_args_rrrr(const uint8_t **tb_ptr,
                           TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGReg *r3)
@@ -432,11 +443,9 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
         TCGReg r0, r1, r2;
         tcg_target_ulong t0;
         tcg_target_ulong t1;
-        tcg_target_ulong t2;
         TCGCond condition;
         target_ulong taddr;
-        uint8_t tmp8;
-        uint16_t tmp16;
+        uint8_t pos, len;
         uint32_t tmp32;
         uint64_t tmp64;
 #if TCG_TARGET_REG_BITS == 32
@@ -627,13 +636,8 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
 #endif
 #if TCG_TARGET_HAS_deposit_i32
         case INDEX_op_deposit_i32:
-            t0 = *tb_ptr++;
-            t1 = tci_read_rval(regs, &tb_ptr);
-            t2 = tci_read_rval(regs, &tb_ptr);
-            tmp16 = *tb_ptr++;
-            tmp8 = *tb_ptr++;
-            tmp32 = (((1 << tmp8) - 1) << tmp16);
-            tci_write_reg(regs, t0, (t1 & ~tmp32) | ((t2 << tmp16) & tmp32));
+            tci_args_rrrbb(&tb_ptr, &r0, &r1, &r2, &pos, &len);
+            regs[r0] = deposit32(regs[r1], pos, len, regs[r2]);
             break;
 #endif
         case INDEX_op_brcond_i32:
@@ -789,13 +793,8 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
 #endif
 #if TCG_TARGET_HAS_deposit_i64
         case INDEX_op_deposit_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_rval(regs, &tb_ptr);
-            t2 = tci_read_rval(regs, &tb_ptr);
-            tmp16 = *tb_ptr++;
-            tmp8 = *tb_ptr++;
-            tmp64 = (((1ULL << tmp8) - 1) << tmp16);
-            tci_write_reg(regs, t0, (t1 & ~tmp64) | ((t2 << tmp16) & tmp64));
+            tci_args_rrrbb(&tb_ptr, &r0, &r1, &r2, &pos, &len);
+            regs[r0] = deposit64(regs[r1], pos, len, regs[r2]);
             break;
 #endif
         case INDEX_op_brcond_i64:
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index 2c64b4f617..640407b4a8 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -126,11 +126,9 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
     case INDEX_op_rotr_i64:
     case INDEX_op_setcond_i32:
     case INDEX_op_setcond_i64:
-        return C_O1_I2(r, r, r);
-
     case INDEX_op_deposit_i32:
     case INDEX_op_deposit_i64:
-        return C_O1_I2(r, 0, r);
+        return C_O1_I2(r, r, r);
 
     case INDEX_op_brcond_i32:
     case INDEX_op_brcond_i64:
@@ -480,13 +478,19 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         break;
 
     CASE_32_64(deposit)  /* Optional (TCG_TARGET_HAS_deposit_*). */
-        tcg_out_r(s, args[0]);
-        tcg_out_r(s, args[1]);
-        tcg_out_r(s, args[2]);
-        tcg_debug_assert(args[3] <= UINT8_MAX);
-        tcg_out8(s, args[3]);
-        tcg_debug_assert(args[4] <= UINT8_MAX);
-        tcg_out8(s, args[4]);
+        {
+            TCGArg pos = args[3], len = args[4];
+            TCGArg max = opc == INDEX_op_deposit_i32 ? 32 : 64;
+
+            tcg_debug_assert(pos < max);
+            tcg_debug_assert(pos + len <= max);
+
+            tcg_out_r(s, args[0]);
+            tcg_out_r(s, args[1]);
+            tcg_out_r(s, args[2]);
+            tcg_out8(s, pos);
+            tcg_out8(s, len);
+        }
         break;
 
     CASE_32_64(brcond)
-- 
2.25.1


Re: [PATCH v5 16/57] tcg/tci: Clean up deposit operations
Posted by Philippe Mathieu-Daudé 4 years, 11 months ago
On 3/11/21 3:39 PM, Richard Henderson wrote:
> Use the correct set of asserts during code generation.
> We do not require the first input to overlap the output;
> the existing interpreter already supported that.
> 
> Split out tci_args_rrrbb in the translator.
> Use the deposit32/64 functions rather than inline expansion.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  tcg/tci/tcg-target-con-set.h |  1 -
>  tcg/tci.c                    | 33 ++++++++++++++++-----------------
>  tcg/tci/tcg-target.c.inc     | 24 ++++++++++++++----------
>  3 files changed, 30 insertions(+), 28 deletions(-)
> 
> diff --git a/tcg/tci/tcg-target-con-set.h b/tcg/tci/tcg-target-con-set.h
> index f51b7bcb13..316730f32c 100644
> --- a/tcg/tci/tcg-target-con-set.h
> +++ b/tcg/tci/tcg-target-con-set.h
> @@ -13,7 +13,6 @@ C_O0_I2(r, r)
>  C_O0_I3(r, r, r)
>  C_O0_I4(r, r, r, r)
>  C_O1_I1(r, r)
> -C_O1_I2(r, 0, r)
>  C_O1_I2(r, r, r)
>  C_O1_I4(r, r, r, r, r)
>  C_O2_I1(r, r, r)
> diff --git a/tcg/tci.c b/tcg/tci.c
> index 10f58e4f25..3ce2b72316 100644
> --- a/tcg/tci.c
> +++ b/tcg/tci.c
> @@ -168,6 +168,7 @@ static tcg_target_ulong tci_read_label(const uint8_t **tb_ptr)
>   *   tci_args_<arguments>
>   * where arguments is a sequence of
>   *
> + *   b = immediate (bit position)
>   *   i = immediate (uint32_t)
>   *   I = immediate (tcg_target_ulong)
>   *   r = register
> @@ -236,6 +237,16 @@ static void tci_args_rrrc(const uint8_t **tb_ptr,
>      *c3 = tci_read_b(tb_ptr);
>  }
>  
> +static void tci_args_rrrbb(const uint8_t **tb_ptr, TCGReg *r0, TCGReg *r1,
> +                           TCGReg *r2, uint8_t *i3, uint8_t *i4)
> +{
> +    *r0 = tci_read_r(tb_ptr);
> +    *r1 = tci_read_r(tb_ptr);
> +    *r2 = tci_read_r(tb_ptr);
> +    *i3 = tci_read_b(tb_ptr);
> +    *i4 = tci_read_b(tb_ptr);
> +}
> +
>  #if TCG_TARGET_REG_BITS == 32
>  static void tci_args_rrrr(const uint8_t **tb_ptr,
>                            TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGReg *r3)
> @@ -432,11 +443,9 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
>          TCGReg r0, r1, r2;
>          tcg_target_ulong t0;
>          tcg_target_ulong t1;
> -        tcg_target_ulong t2;
>          TCGCond condition;
>          target_ulong taddr;
> -        uint8_t tmp8;
> -        uint16_t tmp16;
> +        uint8_t pos, len;
>          uint32_t tmp32;
>          uint64_t tmp64;
>  #if TCG_TARGET_REG_BITS == 32
> @@ -627,13 +636,8 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
>  #endif
>  #if TCG_TARGET_HAS_deposit_i32
>          case INDEX_op_deposit_i32:
> -            t0 = *tb_ptr++;
> -            t1 = tci_read_rval(regs, &tb_ptr);
> -            t2 = tci_read_rval(regs, &tb_ptr);
> -            tmp16 = *tb_ptr++;
> -            tmp8 = *tb_ptr++;
> -            tmp32 = (((1 << tmp8) - 1) << tmp16);
> -            tci_write_reg(regs, t0, (t1 & ~tmp32) | ((t2 << tmp16) & tmp32));
> +            tci_args_rrrbb(&tb_ptr, &r0, &r1, &r2, &pos, &len);
> +            regs[r0] = deposit32(regs[r1], pos, len, regs[r2]);
>              break;
>  #endif
>          case INDEX_op_brcond_i32:
> @@ -789,13 +793,8 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
>  #endif
>  #if TCG_TARGET_HAS_deposit_i64
>          case INDEX_op_deposit_i64:
> -            t0 = *tb_ptr++;
> -            t1 = tci_read_rval(regs, &tb_ptr);
> -            t2 = tci_read_rval(regs, &tb_ptr);
> -            tmp16 = *tb_ptr++;
> -            tmp8 = *tb_ptr++;
> -            tmp64 = (((1ULL << tmp8) - 1) << tmp16);
> -            tci_write_reg(regs, t0, (t1 & ~tmp64) | ((t2 << tmp16) & tmp64));
> +            tci_args_rrrbb(&tb_ptr, &r0, &r1, &r2, &pos, &len);
> +            regs[r0] = deposit64(regs[r1], pos, len, regs[r2]);
>              break;
>  #endif
>          case INDEX_op_brcond_i64:
> diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
> index 2c64b4f617..640407b4a8 100644
> --- a/tcg/tci/tcg-target.c.inc
> +++ b/tcg/tci/tcg-target.c.inc
> @@ -126,11 +126,9 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
>      case INDEX_op_rotr_i64:
>      case INDEX_op_setcond_i32:
>      case INDEX_op_setcond_i64:
> -        return C_O1_I2(r, r, r);
> -
>      case INDEX_op_deposit_i32:
>      case INDEX_op_deposit_i64:
> -        return C_O1_I2(r, 0, r);
> +        return C_O1_I2(r, r, r);
>  
>      case INDEX_op_brcond_i32:
>      case INDEX_op_brcond_i64:
> @@ -480,13 +478,19 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
>          break;
>  
>      CASE_32_64(deposit)  /* Optional (TCG_TARGET_HAS_deposit_*). */
> -        tcg_out_r(s, args[0]);
> -        tcg_out_r(s, args[1]);
> -        tcg_out_r(s, args[2]);
> -        tcg_debug_assert(args[3] <= UINT8_MAX);
> -        tcg_out8(s, args[3]);
> -        tcg_debug_assert(args[4] <= UINT8_MAX);
> -        tcg_out8(s, args[4]);
> +        {
> +            TCGArg pos = args[3], len = args[4];
> +            TCGArg max = opc == INDEX_op_deposit_i32 ? 32 : 64;
> +
> +            tcg_debug_assert(pos < max);
> +            tcg_debug_assert(pos + len <= max);
> +
> +            tcg_out_r(s, args[0]);
> +            tcg_out_r(s, args[1]);
> +            tcg_out_r(s, args[2]);
> +            tcg_out8(s, pos);
> +            tcg_out8(s, len);
> +        }
>          break;
>  
>      CASE_32_64(brcond)
> 

Another KISS :)

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>