[Qemu-devel] [PULL 19/54] s390x/tcg: Implement VECTOR GALOIS FIELD MULTIPLY SUM (AND ACCUMULATE)

Cornelia Huck posted 54 patches 6 years, 5 months ago
Maintainers: Cornelia Huck <cohuck@redhat.com>, Paolo Bonzini <pbonzini@redhat.com>, "Michael S. Tsirkin" <mst@redhat.com>, Christian Borntraeger <borntraeger@de.ibm.com>, Halil Pasic <pasic@linux.ibm.com>, David Hildenbrand <david@redhat.com>, Richard Henderson <rth@twiddle.net>, Thomas Huth <thuth@redhat.com>
There is a newer version of this series
[Qemu-devel] [PULL 19/54] s390x/tcg: Implement VECTOR GALOIS FIELD MULTIPLY SUM (AND ACCUMULATE)
Posted by Cornelia Huck 6 years, 5 months ago
From: David Hildenbrand <david@redhat.com>

A galois field multiplication in field 2 is like binary multiplication,
however instead of doing ordinary binary additions, xor's are performed.
So no carries are considered.

Implement all variants via helpers. s390_vec_sar() and s390_vec_shr()
will be reused later on.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/helper.h           |   8 ++
 target/s390x/insn-data.def      |   4 +
 target/s390x/translate_vx.inc.c |  38 ++++++++
 target/s390x/vec_int_helper.c   | 167 ++++++++++++++++++++++++++++++++
 4 files changed, 217 insertions(+)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 60b8bd3c431a..6e6ba9bf32a2 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -154,6 +154,14 @@ DEF_HELPER_FLAGS_3(gvec_vclz8, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
 DEF_HELPER_FLAGS_3(gvec_vclz16, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
 DEF_HELPER_FLAGS_3(gvec_vctz8, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
 DEF_HELPER_FLAGS_3(gvec_vctz16, TCG_CALL_NO_RWG, void, ptr, cptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vgfm8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vgfm16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vgfm32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vgfm64, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vgfma8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vgfma16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vgfma32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vgfma64, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index b8400c191a7f..add174b79381 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1090,6 +1090,10 @@
     F(0xe752, VCTZ,    VRR_a, V,   0, 0, 0, 0, vctz, 0, IF_VEC)
 /* VECTOR EXCLUSIVE OR */
     F(0xe76d, VX,      VRR_c, V,   0, 0, 0, 0, vx, 0, IF_VEC)
+/* VECTOR GALOIS FIELD MULTIPLY SUM */
+    F(0xe7b4, VGFM,    VRR_c, V,   0, 0, 0, 0, vgfm, 0, IF_VEC)
+/* VECTOR GALOIS FIELD MULTIPLY SUM AND ACCUMULATE */
+    F(0xe7bc, VGFMA,   VRR_d, V,   0, 0, 0, 0, vgfma, 0, IF_VEC)
 
 #ifndef CONFIG_USER_ONLY
 /* COMPARE AND SWAP AND PURGE */
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index 0935857eff09..1db5d6d152c6 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -1483,3 +1483,41 @@ static DisasJumpType op_vx(DisasContext *s, DisasOps *o)
                  get_field(s->fields, v3));
     return DISAS_NEXT;
 }
+
+static DisasJumpType op_vgfm(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s->fields, m4);
+    static const GVecGen3 g[4] = {
+        { .fno = gen_helper_gvec_vgfm8, },
+        { .fno = gen_helper_gvec_vgfm16, },
+        { .fno = gen_helper_gvec_vgfm32, },
+        { .fno = gen_helper_gvec_vgfm64, },
+    };
+
+    if (es > ES_64) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+    gen_gvec_3(get_field(s->fields, v1), get_field(s->fields, v2),
+               get_field(s->fields, v3), &g[es]);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vgfma(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s->fields, m5);
+    static const GVecGen4 g[4] = {
+        { .fno = gen_helper_gvec_vgfma8, },
+        { .fno = gen_helper_gvec_vgfma16, },
+        { .fno = gen_helper_gvec_vgfma32, },
+        { .fno = gen_helper_gvec_vgfma64, },
+    };
+
+    if (es > ES_64) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+    gen_gvec_4(get_field(s->fields, v1), get_field(s->fields, v2),
+               get_field(s->fields, v3), get_field(s->fields, v4), &g[es]);
+    return DISAS_NEXT;
+}
diff --git a/target/s390x/vec_int_helper.c b/target/s390x/vec_int_helper.c
index d1b1f2850990..20a1034dd83e 100644
--- a/target/s390x/vec_int_helper.c
+++ b/target/s390x/vec_int_helper.c
@@ -15,6 +15,59 @@
 #include "vec.h"
 #include "exec/helper-proto.h"
 
+static bool s390_vec_is_zero(const S390Vector *v)
+{
+    return !v->doubleword[0] && !v->doubleword[1];
+}
+
+static void s390_vec_xor(S390Vector *res, const S390Vector *a,
+                         const S390Vector *b)
+{
+    res->doubleword[0] = a->doubleword[0] ^ b->doubleword[0];
+    res->doubleword[1] = a->doubleword[1] ^ b->doubleword[1];
+}
+
+static void s390_vec_shl(S390Vector *d, const S390Vector *a, uint64_t count)
+{
+    uint64_t tmp;
+
+    g_assert(count < 128);
+    if (count == 0) {
+        d->doubleword[0] = a->doubleword[0];
+        d->doubleword[1] = a->doubleword[1];
+    } else if (count == 64) {
+        d->doubleword[0] = a->doubleword[1];
+        d->doubleword[1] = 0;
+    } else if (count < 64) {
+        tmp = extract64(a->doubleword[1], 64 - count, count);
+        d->doubleword[1] = a->doubleword[1] << count;
+        d->doubleword[0] = (a->doubleword[0] << count) | tmp;
+    } else {
+        d->doubleword[0] = a->doubleword[1] << (count - 64);
+        d->doubleword[1] = 0;
+    }
+}
+
+static void s390_vec_shr(S390Vector *d, const S390Vector *a, uint64_t count)
+{
+    uint64_t tmp;
+
+    g_assert(count < 128);
+    if (count == 0) {
+        d->doubleword[0] = a->doubleword[0];
+        d->doubleword[1] = a->doubleword[1];
+    } else if (count == 64) {
+        d->doubleword[1] = a->doubleword[0];
+        d->doubleword[0] = 0;
+    } else if (count < 64) {
+        tmp = a->doubleword[1] >> count;
+        d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
+        d->doubleword[0] = a->doubleword[0] >> count;
+    } else {
+        d->doubleword[1] = a->doubleword[0] >> (count - 64);
+        d->doubleword[0] = 0;
+    }
+}
 #define DEF_VAVG(BITS)                                                         \
 void HELPER(gvec_vavg##BITS)(void *v1, const void *v2, const void *v3,         \
                              uint32_t desc)                                    \
@@ -74,3 +127,117 @@ void HELPER(gvec_vctz##BITS)(void *v1, const void *v2, uint32_t desc)          \
 }
 DEF_VCTZ(8)
 DEF_VCTZ(16)
+
+/* like binary multiplication, but XOR instead of addition */
+#define DEF_GALOIS_MULTIPLY(BITS, TBITS)                                       \
+static uint##TBITS##_t galois_multiply##BITS(uint##TBITS##_t a,                \
+                                             uint##TBITS##_t b)                \
+{                                                                              \
+    uint##TBITS##_t res = 0;                                                   \
+                                                                               \
+    while (b) {                                                                \
+        if (b & 0x1) {                                                         \
+            res = res ^ a;                                                     \
+        }                                                                      \
+        a = a << 1;                                                            \
+        b = b >> 1;                                                            \
+    }                                                                          \
+    return res;                                                                \
+}
+DEF_GALOIS_MULTIPLY(8, 16)
+DEF_GALOIS_MULTIPLY(16, 32)
+DEF_GALOIS_MULTIPLY(32, 64)
+
+static S390Vector galois_multiply64(uint64_t a, uint64_t b)
+{
+    S390Vector res = {};
+    S390Vector va = {
+        .doubleword[1] = a,
+    };
+    S390Vector vb = {
+        .doubleword[1] = b,
+    };
+
+    while (!s390_vec_is_zero(&vb)) {
+        if (vb.doubleword[1] & 0x1) {
+            s390_vec_xor(&res, &res, &va);
+        }
+        s390_vec_shl(&va, &va, 1);
+        s390_vec_shr(&vb, &vb, 1);
+    }
+    return res;
+}
+
+#define DEF_VGFM(BITS, TBITS)                                                  \
+void HELPER(gvec_vgfm##BITS)(void *v1, const void *v2, const void *v3,         \
+                             uint32_t desc)                                    \
+{                                                                              \
+    int i;                                                                     \
+                                                                               \
+    for (i = 0; i < (128 / TBITS); i++) {                                      \
+        uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2);             \
+        uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2);             \
+        uint##TBITS##_t d = galois_multiply##BITS(a, b);                       \
+                                                                               \
+        a = s390_vec_read_element##BITS(v2, i * 2 + 1);                        \
+        b = s390_vec_read_element##BITS(v3, i * 2 + 1);                        \
+        d = d ^ galois_multiply32(a, b);                                       \
+        s390_vec_write_element##TBITS(v1, i, d);                               \
+    }                                                                          \
+}
+DEF_VGFM(8, 16)
+DEF_VGFM(16, 32)
+DEF_VGFM(32, 64)
+
+void HELPER(gvec_vgfm64)(void *v1, const void *v2, const void *v3,
+                         uint32_t desc)
+{
+    S390Vector tmp1, tmp2;
+    uint64_t a, b;
+
+    a = s390_vec_read_element64(v2, 0);
+    b = s390_vec_read_element64(v3, 0);
+    tmp1 = galois_multiply64(a, b);
+    a = s390_vec_read_element64(v2, 1);
+    b = s390_vec_read_element64(v3, 1);
+    tmp2 = galois_multiply64(a, b);
+    s390_vec_xor(v1, &tmp1, &tmp2);
+}
+
+#define DEF_VGFMA(BITS, TBITS)                                                 \
+void HELPER(gvec_vgfma##BITS)(void *v1, const void *v2, const void *v3,        \
+                              const void *v4, uint32_t desc)                   \
+{                                                                              \
+    int i;                                                                     \
+                                                                               \
+    for (i = 0; i < (128 / TBITS); i++) {                                      \
+        uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2);             \
+        uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2);             \
+        uint##TBITS##_t d = galois_multiply##BITS(a, b);                       \
+                                                                               \
+        a = s390_vec_read_element##BITS(v2, i * 2 + 1);                        \
+        b = s390_vec_read_element##BITS(v3, i * 2 + 1);                        \
+        d = d ^ galois_multiply32(a, b);                                       \
+        d = d ^ s390_vec_read_element##TBITS(v4, i);                           \
+        s390_vec_write_element##TBITS(v1, i, d);                               \
+    }                                                                          \
+}
+DEF_VGFMA(8, 16)
+DEF_VGFMA(16, 32)
+DEF_VGFMA(32, 64)
+
+void HELPER(gvec_vgfma64)(void *v1, const void *v2, const void *v3,
+                          const void *v4, uint32_t desc)
+{
+    S390Vector tmp1, tmp2;
+    uint64_t a, b;
+
+    a = s390_vec_read_element64(v2, 0);
+    b = s390_vec_read_element64(v3, 0);
+    tmp1 = galois_multiply64(a, b);
+    a = s390_vec_read_element64(v2, 1);
+    b = s390_vec_read_element64(v3, 1);
+    tmp2 = galois_multiply64(a, b);
+    s390_vec_xor(&tmp1, &tmp1, &tmp2);
+    s390_vec_xor(v1, &tmp1, v4);
+}
-- 
2.20.1


Re: [Qemu-devel] [PULL 19/54] s390x/tcg: Implement VECTOR GALOIS FIELD MULTIPLY SUM (AND ACCUMULATE)
Posted by Peter Maydell 6 years, 5 months ago
On Mon, 20 May 2019 at 18:06, Cornelia Huck <cohuck@redhat.com> wrote:
>
> From: David Hildenbrand <david@redhat.com>
>
> A galois field multiplication in field 2 is like binary multiplication,
> however instead of doing ordinary binary additions, xor's are performed.
> So no carries are considered.
>
> Implement all variants via helpers. s390_vec_sar() and s390_vec_shr()
> will be reused later on.
>
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> Signed-off-by: David Hildenbrand <david@redhat.com>

Hi -- Coverity (CID 1401703) complains that a lot of this
function is dead code:

> +static S390Vector galois_multiply64(uint64_t a, uint64_t b)
> +{
> +    S390Vector res = {};
> +    S390Vector va = {
> +        .doubleword[1] = a,
> +    };
> +    S390Vector vb = {
> +        .doubleword[1] = b,
> +    };
> +
> +    while (!s390_vec_is_zero(&vb)) {
> +        if (vb.doubleword[1] & 0x1) {
> +            s390_vec_xor(&res, &res, &va);
> +        }
> +        s390_vec_shl(&va, &va, 1);
> +        s390_vec_shr(&vb, &vb, 1);
> +    }
> +    return res;
> +}

but I can't make any sense of its annotations or why it
thinks this is true. Would somebody like to have a look at the
issue? If it's just Coverity getting confused we can mark it
as a false positive.

thanks
-- PMM

Re: [Qemu-devel] [PULL 19/54] s390x/tcg: Implement VECTOR GALOIS FIELD MULTIPLY SUM (AND ACCUMULATE)
Posted by David Hildenbrand 6 years, 5 months ago
On 30.05.19 13:22, Peter Maydell wrote:
> On Mon, 20 May 2019 at 18:06, Cornelia Huck <cohuck@redhat.com> wrote:
>>
>> From: David Hildenbrand <david@redhat.com>
>>
>> A galois field multiplication in field 2 is like binary multiplication,
>> however instead of doing ordinary binary additions, xor's are performed.
>> So no carries are considered.
>>
>> Implement all variants via helpers. s390_vec_sar() and s390_vec_shr()
>> will be reused later on.
>>
>> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
>> Signed-off-by: David Hildenbrand <david@redhat.com>
> 
> Hi -- Coverity (CID 1401703) complains that a lot of this
> function is dead code:
> 
>> +static S390Vector galois_multiply64(uint64_t a, uint64_t b)
>> +{
>> +    S390Vector res = {};
>> +    S390Vector va = {
>> +        .doubleword[1] = a,
>> +    };
>> +    S390Vector vb = {
>> +        .doubleword[1] = b,
>> +    };
>> +
>> +    while (!s390_vec_is_zero(&vb)) {
>> +        if (vb.doubleword[1] & 0x1) {
>> +            s390_vec_xor(&res, &res, &va);
>> +        }
>> +        s390_vec_shl(&va, &va, 1);
>> +        s390_vec_shr(&vb, &vb, 1);
>> +    }
>> +    return res;
>> +}
> 
> but I can't make any sense of its annotations or why it
> thinks this is true. Would somebody like to have a look at the
> issue? If it's just Coverity getting confused we can mark it
> as a false positive.

I can't make absolutely any sense of the coverity gibberish as well.

The only think is that "vb->doubleword[0]" will always be 0, but that's
not what coverity is complaining about.

Marking it as false positive, thanks!

> 
> thanks
> -- PMM
> 


-- 

Thanks,

David / dhildenb

Re: [Qemu-devel] [PULL 19/54] s390x/tcg: Implement VECTOR GALOIS FIELD MULTIPLY SUM (AND ACCUMULATE)
Posted by Richard Henderson 6 years, 5 months ago
On 5/31/19 6:32 AM, David Hildenbrand wrote:
> On 30.05.19 13:22, Peter Maydell wrote:
>> Hi -- Coverity (CID 1401703) complains that a lot of this
>> function is dead code:
>>
>>> +static S390Vector galois_multiply64(uint64_t a, uint64_t b)
>>> +{
>>> +    S390Vector res = {};
>>> +    S390Vector va = {
>>> +        .doubleword[1] = a,
>>> +    };
...
>> but I can't make any sense of its annotations or why it
>> thinks this is true. Would somebody like to have a look at the
>> issue? If it's just Coverity getting confused we can mark it
>> as a false positive.
> 
> I can't make absolutely any sense of the coverity gibberish as well.
> 
> The only think is that "vb->doubleword[0]" will always be 0, but that's
> not what coverity is complaining about.
> 
> Marking it as false positive, thanks!

It does seem to be gibberish.  How in the world does it believe that the
dimensionality of S390Vector is variable.

However, because of where the two errors are placed, I can only imagine that
Coverity is confused by the syntax of the initialization.

If it were easier to run and get results, I'd try making the assignment as a
separate statement, instead of the init syntax.  But it's probably not worth
the churn.


r~

Re: [Qemu-devel] [PULL 19/54] s390x/tcg: Implement VECTOR GALOIS FIELD MULTIPLY SUM (AND ACCUMULATE)
Posted by David Hildenbrand 6 years, 5 months ago
On 30.05.19 13:22, Peter Maydell wrote:
> On Mon, 20 May 2019 at 18:06, Cornelia Huck <cohuck@redhat.com> wrote:
>>
>> From: David Hildenbrand <david@redhat.com>
>>
>> A galois field multiplication in field 2 is like binary multiplication,
>> however instead of doing ordinary binary additions, xor's are performed.
>> So no carries are considered.
>>
>> Implement all variants via helpers. s390_vec_sar() and s390_vec_shr()
>> will be reused later on.
>>
>> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
>> Signed-off-by: David Hildenbrand <david@redhat.com>
> 
> Hi -- Coverity (CID 1401703) complains that a lot of this
> function is dead code:
> 
>> +static S390Vector galois_multiply64(uint64_t a, uint64_t b)
>> +{
>> +    S390Vector res = {};
>> +    S390Vector va = {
>> +        .doubleword[1] = a,
>> +    };
>> +    S390Vector vb = {
>> +        .doubleword[1] = b,
>> +    };
>> +
>> +    while (!s390_vec_is_zero(&vb)) {
>> +        if (vb.doubleword[1] & 0x1) {
>> +            s390_vec_xor(&res, &res, &va);
>> +        }
>> +        s390_vec_shl(&va, &va, 1);
>> +        s390_vec_shr(&vb, &vb, 1);
>> +    }
>> +    return res;
>> +}
> 
> but I can't make any sense of its annotations or why it
> thinks this is true. Would somebody like to have a look at the
> issue? If it's just Coverity getting confused we can mark it
> as a false positive.

I am pretty sure it is a false positive. I'll have to request access to
coverity first to have a look at the details. Thanks!

> 
> thanks
> -- PMM
> 


-- 

Thanks,

David / dhildenb