[Qemu-devel] [PATCH v2 13/32] s390x/tcg: Implement VECTOR LOAD MULTIPLE

David Hildenbrand posted 32 patches 6 years, 7 months ago
Maintainers: Richard Henderson <rth@twiddle.net>, Cornelia Huck <cohuck@redhat.com>, David Hildenbrand <david@redhat.com>
There is a newer version of this series
[Qemu-devel] [PATCH v2 13/32] s390x/tcg: Implement VECTOR LOAD MULTIPLE
Posted by David Hildenbrand 6 years, 7 months ago
Try to load the last element first. Access to the first element will
be checked afterwards. This way, we can guarantee that the vector is
not modified before we checked for all possible exceptions. (16 vectors
cannot cross more than two pages)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/insn-data.def      |  2 ++
 target/s390x/translate_vx.inc.c | 34 +++++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)

diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 2b36205c84..fa0f3a9003 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1000,6 +1000,8 @@
     F(0xe721, VLGV,    VRS_c, V,   la2, 0, r1, 0, vlgv, 0, IF_VEC)
 /* VECTOR LOAD LOGICAL ELEMENT AND ZERO */
     F(0xe704, VLLEZ,   VRX,   V,   la2, 0, 0, 0, vllez, 0, IF_VEC)
+/* VECTOR LOAD MULTIPLE */
+    F(0xe736, VLM,     VRS_a, V,   la2, 0, 0, 0, vlm, 0, IF_VEC)
 
 #ifndef CONFIG_USER_ONLY
 /* COMPARE AND SWAP AND PURGE */
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index cdad2a52f0..93f9c0f804 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -406,3 +406,37 @@ static DisasJumpType op_vllez(DisasContext *s, DisasOps *o)
     tcg_temp_free_i64(t);
     return DISAS_NEXT;
 }
+
+static DisasJumpType op_vlm(DisasContext *s, DisasOps *o)
+{
+    const uint8_t v3 = get_field(s->fields, v3);
+    uint8_t v1 = get_field(s->fields, v1);
+    TCGv_i64 t;
+
+    while (v3 < v1 || (v3 - v1 + 1) > 16) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    /*
+     * Check for possible access exceptions by trying to load the last
+     * element. The first element will be checked first next.
+     */
+    t = tcg_temp_new_i64();
+    gen_addi_and_wrap_i64(s, t, o->addr1, (v3 - v1) * 16 + 8);
+    tcg_gen_qemu_ld_i64(t, t, get_mem_index(s), MO_TEQ);
+
+    for (;; v1++) {
+        tcg_gen_qemu_ld_i64(t, o->addr1, get_mem_index(s), MO_TEQ);
+        write_vec_element_i64(t, v1, 0, ES_64);
+        gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
+        tcg_gen_qemu_ld_i64(t, o->addr1, get_mem_index(s), MO_TEQ);
+        write_vec_element_i64(t, v1, 1, ES_64);
+        if (v1 == v3) {
+            break;
+        }
+        gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
+    }
+    tcg_temp_free_i64(t);
+    return DISAS_NEXT;
+}
-- 
2.17.2


Re: [Qemu-devel] [PATCH v2 13/32] s390x/tcg: Implement VECTOR LOAD MULTIPLE
Posted by Richard Henderson 6 years, 7 months ago
On 3/1/19 3:53 AM, David Hildenbrand wrote:
> +    /*
> +     * Check for possible access exceptions by trying to load the last
> +     * element. The first element will be checked first next.
> +     */
> +    t = tcg_temp_new_i64();
> +    gen_addi_and_wrap_i64(s, t, o->addr1, (v3 - v1) * 16 + 8);
> +    tcg_gen_qemu_ld_i64(t, t, get_mem_index(s), MO_TEQ);

qemu_ld expands to enough code that it is a shame to discard this value and
reload it during this loop.  Perhaps load this to t2...

> +
> +    for (;; v1++) {
> +        tcg_gen_qemu_ld_i64(t, o->addr1, get_mem_index(s), MO_TEQ);
> +        write_vec_element_i64(t, v1, 0, ES_64);

Move v1 == v3 break here...

> +        gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
> +        tcg_gen_qemu_ld_i64(t, o->addr1, get_mem_index(s), MO_TEQ);
> +        write_vec_element_i64(t, v1, 1, ES_64);
> +        if (v1 == v3) {
> +            break;
> +        }
> +        gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
> +    }

... and store t2 into v3 element 1 after the loop.


r~

Re: [Qemu-devel] [PATCH v2 13/32] s390x/tcg: Implement VECTOR LOAD MULTIPLE
Posted by David Hildenbrand 6 years, 7 months ago
On 01.03.19 17:26, Richard Henderson wrote:
> On 3/1/19 3:53 AM, David Hildenbrand wrote:
>> +    /*
>> +     * Check for possible access exceptions by trying to load the last
>> +     * element. The first element will be checked first next.
>> +     */
>> +    t = tcg_temp_new_i64();
>> +    gen_addi_and_wrap_i64(s, t, o->addr1, (v3 - v1) * 16 + 8);
>> +    tcg_gen_qemu_ld_i64(t, t, get_mem_index(s), MO_TEQ);
> 
> qemu_ld expands to enough code that it is a shame to discard this value and
> reload it during this loop.  Perhaps load this to t2...
> 
>> +
>> +    for (;; v1++) {
>> +        tcg_gen_qemu_ld_i64(t, o->addr1, get_mem_index(s), MO_TEQ);
>> +        write_vec_element_i64(t, v1, 0, ES_64);
> 
> Move v1 == v3 break here...
> 
>> +        gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
>> +        tcg_gen_qemu_ld_i64(t, o->addr1, get_mem_index(s), MO_TEQ);
>> +        write_vec_element_i64(t, v1, 1, ES_64);
>> +        if (v1 == v3) {
>> +            break;
>> +        }
>> +        gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
>> +    }
> 
> ... and store t2 into v3 element 1 after the loop.

Yes, makes perfect sense, thanks!


-- 

Thanks,

David / dhildenb

Re: [Qemu-devel] [PATCH v2 13/32] s390x/tcg: Implement VECTOR LOAD MULTIPLE
Posted by David Hildenbrand 6 years, 7 months ago
On 01.03.19 12:53, David Hildenbrand wrote:
> Try to load the last element first. Access to the first element will
> be checked afterwards. This way, we can guarantee that the vector is
> not modified before we checked for all possible exceptions. (16 vectors
> cannot cross more than two pages)
> 
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> Signed-off-by: David Hildenbrand <david@redhat.com>
> ---
>  target/s390x/insn-data.def      |  2 ++
>  target/s390x/translate_vx.inc.c | 34 +++++++++++++++++++++++++++++++++
>  2 files changed, 36 insertions(+)
> 
> diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
> index 2b36205c84..fa0f3a9003 100644
> --- a/target/s390x/insn-data.def
> +++ b/target/s390x/insn-data.def
> @@ -1000,6 +1000,8 @@
>      F(0xe721, VLGV,    VRS_c, V,   la2, 0, r1, 0, vlgv, 0, IF_VEC)
>  /* VECTOR LOAD LOGICAL ELEMENT AND ZERO */
>      F(0xe704, VLLEZ,   VRX,   V,   la2, 0, 0, 0, vllez, 0, IF_VEC)
> +/* VECTOR LOAD MULTIPLE */
> +    F(0xe736, VLM,     VRS_a, V,   la2, 0, 0, 0, vlm, 0, IF_VEC)
>  
>  #ifndef CONFIG_USER_ONLY
>  /* COMPARE AND SWAP AND PURGE */
> diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
> index cdad2a52f0..93f9c0f804 100644
> --- a/target/s390x/translate_vx.inc.c
> +++ b/target/s390x/translate_vx.inc.c
> @@ -406,3 +406,37 @@ static DisasJumpType op_vllez(DisasContext *s, DisasOps *o)
>      tcg_temp_free_i64(t);
>      return DISAS_NEXT;
>  }
> +
> +static DisasJumpType op_vlm(DisasContext *s, DisasOps *o)
> +{
> +    const uint8_t v3 = get_field(s->fields, v3);
> +    uint8_t v1 = get_field(s->fields, v1);
> +    TCGv_i64 t;
> +
> +    while (v3 < v1 || (v3 - v1 + 1) > 16) {

while -> if, funny that this works

> +        gen_program_exception(s, PGM_SPECIFICATION);
> +        return DISAS_NORETURN;
> +    }
> +
> +    /*
> +     * Check for possible access exceptions by trying to load the last
> +     * element. The first element will be checked first next.
> +     */
> +    t = tcg_temp_new_i64();
> +    gen_addi_and_wrap_i64(s, t, o->addr1, (v3 - v1) * 16 + 8);
> +    tcg_gen_qemu_ld_i64(t, t, get_mem_index(s), MO_TEQ);
> +
> +    for (;; v1++) {
> +        tcg_gen_qemu_ld_i64(t, o->addr1, get_mem_index(s), MO_TEQ);
> +        write_vec_element_i64(t, v1, 0, ES_64);
> +        gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
> +        tcg_gen_qemu_ld_i64(t, o->addr1, get_mem_index(s), MO_TEQ);
> +        write_vec_element_i64(t, v1, 1, ES_64);
> +        if (v1 == v3) {
> +            break;
> +        }
> +        gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
> +    }
> +    tcg_temp_free_i64(t);
> +    return DISAS_NEXT;
> +}
> 


-- 

Thanks,

David / dhildenb