[PATCH v3 34/81] target/arm: Implement SVE2 WHILERW, WHILEWR

Richard Henderson posted 81 patches 5 years, 4 months ago
Maintainers: Peter Maydell <peter.maydell@linaro.org>
There is a newer version of this series
[PATCH v3 34/81] target/arm: Implement SVE2 WHILERW, WHILEWR
Posted by Richard Henderson 5 years, 4 months ago
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
v2: Fix decodetree typo
---
 target/arm/sve.decode      |  3 ++
 target/arm/translate-sve.c | 62 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 65 insertions(+)

diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index b7038f9f57..19d503e2f4 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -702,6 +702,9 @@ CTERM           00100101 1 sf:1 1 rm:5 001000 rn:5 ne:1 0000
 # SVE integer compare scalar count and limit
 WHILE           00100101 esz:2 1 rm:5 000 sf:1 u:1 lt:1 rn:5 eq:1 rd:4
 
+# SVE2 pointer conflict compare
+WHILE_ptr       00100101 esz:2 1 rm:5 001 100 rn:5 rw:1 rd:4
+
 ### SVE Integer Wide Immediate - Unpredicated Group
 
 # SVE broadcast floating-point immediate (unpredicated)
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index f1bc4c63e6..d3241ce167 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -3227,6 +3227,68 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
     return true;
 }
 
+static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
+{
+    TCGv_i64 op0, op1, diff, t1, tmax;
+    TCGv_i32 t2, t3;
+    TCGv_ptr ptr;
+    unsigned desc, vsz = vec_full_reg_size(s);
+
+    if (!dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    if (!sve_access_check(s)) {
+        return true;
+    }
+
+    op0 = read_cpu_reg(s, a->rn, 1);
+    op1 = read_cpu_reg(s, a->rm, 1);
+
+    tmax = tcg_const_i64(vsz);
+    diff = tcg_temp_new_i64();
+
+    if (a->rw) {
+        /* WHILERW */
+        /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */
+        t1 = tcg_temp_new_i64();
+        tcg_gen_sub_i64(diff, op0, op1);
+        tcg_gen_sub_i64(t1, op1, op0);
+        tcg_gen_movcond_i64(TCG_COND_LTU, diff, op0, op1, diff, t1);
+        tcg_temp_free_i64(t1);
+        /* If op1 == op0, diff == 0, and the condition is always true. */
+        tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);
+    } else {
+        /* WHILEWR */
+        tcg_gen_sub_i64(diff, op1, op0);
+        /* If op0 >= op1, diff <= 0, the condition is always true. */
+        tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);
+    }
+
+    /* Bound to the maximum.  */
+    tcg_gen_umin_i64(diff, diff, tmax);
+    tcg_temp_free_i64(tmax);
+
+    /* Since we're bounded, pass as a 32-bit type.  */
+    t2 = tcg_temp_new_i32();
+    tcg_gen_extrl_i64_i32(t2, diff);
+    tcg_temp_free_i64(diff);
+
+    desc = (vsz / 8) - 2;
+    desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
+    t3 = tcg_const_i32(desc);
+
+    ptr = tcg_temp_new_ptr();
+    tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
+
+    gen_helper_sve_whilel(t2, ptr, t2, t3);
+    do_pred_flags(t2);
+
+    tcg_temp_free_ptr(ptr);
+    tcg_temp_free_i32(t2);
+    tcg_temp_free_i32(t3);
+    return true;
+}
+
 /*
  *** SVE Integer Wide Immediate - Unpredicated Group
  */
-- 
2.25.1


Re: [PATCH v3 34/81] target/arm: Implement SVE2 WHILERW, WHILEWR
Posted by LIU Zhiwei 5 years, 4 months ago

On 2020/9/19 2:37, Richard Henderson wrote:
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> v2: Fix decodetree typo
> ---
>   target/arm/sve.decode      |  3 ++
>   target/arm/translate-sve.c | 62 ++++++++++++++++++++++++++++++++++++++
>   2 files changed, 65 insertions(+)
>
> diff --git a/target/arm/sve.decode b/target/arm/sve.decode
> index b7038f9f57..19d503e2f4 100644
> --- a/target/arm/sve.decode
> +++ b/target/arm/sve.decode
> @@ -702,6 +702,9 @@ CTERM           00100101 1 sf:1 1 rm:5 001000 rn:5 ne:1 0000
>   # SVE integer compare scalar count and limit
>   WHILE           00100101 esz:2 1 rm:5 000 sf:1 u:1 lt:1 rn:5 eq:1 rd:4
>   
> +# SVE2 pointer conflict compare
> +WHILE_ptr       00100101 esz:2 1 rm:5 001 100 rn:5 rw:1 rd:4
> +
>   ### SVE Integer Wide Immediate - Unpredicated Group
>   
>   # SVE broadcast floating-point immediate (unpredicated)
> diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
> index f1bc4c63e6..d3241ce167 100644
> --- a/target/arm/translate-sve.c
> +++ b/target/arm/translate-sve.c
> @@ -3227,6 +3227,68 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
>       return true;
>   }
>   
> +static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
> +{
> +    TCGv_i64 op0, op1, diff, t1, tmax;
> +    TCGv_i32 t2, t3;
> +    TCGv_ptr ptr;
> +    unsigned desc, vsz = vec_full_reg_size(s);
> +
> +    if (!dc_isar_feature(aa64_sve2, s)) {
> +        return false;
> +    }
> +    if (!sve_access_check(s)) {
> +        return true;
> +    }
> +
> +    op0 = read_cpu_reg(s, a->rn, 1);
> +    op1 = read_cpu_reg(s, a->rm, 1);
> +
> +    tmax = tcg_const_i64(vsz);
> +    diff = tcg_temp_new_i64();
> +
> +    if (a->rw) {
> +        /* WHILERW */
> +        /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */
> +        t1 = tcg_temp_new_i64();
> +        tcg_gen_sub_i64(diff, op0, op1);
> +        tcg_gen_sub_i64(t1, op1, op0);
> +        tcg_gen_movcond_i64(TCG_COND_LTU, diff, op0, op1, diff, t1);
It should be:

tcg_gen_movcond_i64(TCG_COND_GTU, diff, op0, op1, diff, t1);

> +        tcg_temp_free_i64(t1);
> +        /* If op1 == op0, diff == 0, and the condition is always true. */
> +        tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);
> +    } else {
> +        /* WHILEWR */
> +        tcg_gen_sub_i64(diff, op1, op0);
> +        /* If op0 >= op1, diff <= 0, the condition is always true. */
> +        tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);
> +    }
> +
> +    /* Bound to the maximum.  */
> +    tcg_gen_umin_i64(diff, diff, tmax);
> +    tcg_temp_free_i64(tmax);
> +
> +    /* Since we're bounded, pass as a 32-bit type.  */
> +    t2 = tcg_temp_new_i32();
> +    tcg_gen_extrl_i64_i32(t2, diff);
We should align count down to (1 << esz),

tcg_gen_andi_i32(t2,~MAKE_64BIT_MASK(0, esz));

Best Regards,
Zhiwei
> +    tcg_temp_free_i64(diff);
> +
> +    desc = (vsz / 8) - 2;
> +    desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
> +    t3 = tcg_const_i32(desc);
> +
> +    ptr = tcg_temp_new_ptr();
> +    tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
> +
> +    gen_helper_sve_whilel(t2, ptr, t2, t3);
> +    do_pred_flags(t2);
> +
> +    tcg_temp_free_ptr(ptr);
> +    tcg_temp_free_i32(t2);
> +    tcg_temp_free_i32(t3);
> +    return true;
> +}
> +
>   /*
>    *** SVE Integer Wide Immediate - Unpredicated Group
>    */

Re: [PATCH v3 34/81] target/arm: Implement SVE2 WHILERW, WHILEWR
Posted by Richard Henderson 5 years, 3 months ago
On 10/12/20 7:33 PM, LIU Zhiwei wrote:
>> +    if (a->rw) {
>> +        /* WHILERW */
>> +        /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */
>> +        t1 = tcg_temp_new_i64();
>> +        tcg_gen_sub_i64(diff, op0, op1);
>> +        tcg_gen_sub_i64(t1, op1, op0);
>> +        tcg_gen_movcond_i64(TCG_COND_LTU, diff, op0, op1, diff, t1);
> It should be:
> 
> tcg_gen_movcond_i64(TCG_COND_GTU, diff, op0, op1, diff, t1);

Yep.

> 
>> +        tcg_temp_free_i64(t1);
>> +        /* If op1 == op0, diff == 0, and the condition is always true. */
>> +        tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);
>> +    } else {
>> +        /* WHILEWR */
>> +        tcg_gen_sub_i64(diff, op1, op0);
>> +        /* If op0 >= op1, diff <= 0, the condition is always true. */
>> +        tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);
>> +    }
>> +
>> +    /* Bound to the maximum.  */
>> +    tcg_gen_umin_i64(diff, diff, tmax);
>> +    tcg_temp_free_i64(tmax);
>> +
>> +    /* Since we're bounded, pass as a 32-bit type.  */
>> +    t2 = tcg_temp_new_i32();
>> +    tcg_gen_extrl_i64_i32(t2, diff);
> We should align count down to (1 << esz),
> 
> tcg_gen_andi_i32(t2,~MAKE_64BIT_MASK(0, esz));

Yep, this corresponds to the "DIV (esize DIV 8)" portion of the psuedo code.
But it needs to go earlier, before we compare diff against 0 in the two movcond
above.

Will fix.  Thanks,


r~