[Qemu-devel] [PATCH 3/6] target/ppc: rework vmul{e, o}{s, u}{b, h, w} instructions to use Vsr* macros

Mark Cave-Ayland posted 6 patches 6 years, 10 months ago
There is a newer version of this series
[Qemu-devel] [PATCH 3/6] target/ppc: rework vmul{e, o}{s, u}{b, h, w} instructions to use Vsr* macros
Posted by Mark Cave-Ayland 6 years, 10 months ago
The current implementations make use of the endian-specific macros HI_IDX and
LO_IDX directly to calculate array offsets.

Rework the implementation to use the Vsr* macros so that these per-endian
references can be removed.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
---
 target/ppc/int_helper.c | 48 +++++++++++++++++++++++++++---------------------
 1 file changed, 27 insertions(+), 21 deletions(-)

diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index f084a706ee..dc5c9fb8d8 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -1118,33 +1118,39 @@ void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
     }
 }
 
-#define VMUL_DO(name, mul_element, prod_element, cast, evenp)           \
+#define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast)   \
     void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
     {                                                                   \
         int i;                                                          \
                                                                         \
-        VECTOR_FOR_INORDER_I(i, prod_element) {                         \
-            if (evenp) {                                                \
-                r->prod_element[i] =                                    \
-                    (cast)a->mul_element[i * 2 + HI_IDX] *              \
-                    (cast)b->mul_element[i * 2 + HI_IDX];               \
-            } else {                                                    \
-                r->prod_element[i] =                                    \
-                    (cast)a->mul_element[i * 2 + LO_IDX] *              \
-                    (cast)b->mul_element[i * 2 + LO_IDX];               \
-            }                                                           \
+        for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) {           \
+            r->prod_access(i >> 1) = (cast)a->mul_access(i) *           \
+                                     (cast)b->mul_access(i);            \
+        }                                                               \
+    }
+
+#define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast)   \
+    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
+    {                                                                   \
+        int i;                                                          \
+                                                                        \
+        for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) {           \
+            r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) *       \
+                                     (cast)b->mul_access(i + 1);        \
         }                                                               \
     }
-#define VMUL(suffix, mul_element, prod_element, cast)            \
-    VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1)    \
-    VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
-VMUL(sb, s8, s16, int16_t)
-VMUL(sh, s16, s32, int32_t)
-VMUL(sw, s32, s64, int64_t)
-VMUL(ub, u8, u16, uint16_t)
-VMUL(uh, u16, u32, uint32_t)
-VMUL(uw, u32, u64, uint64_t)
-#undef VMUL_DO
+
+#define VMUL(suffix, mul_element, mul_access, prod_access, cast)       \
+    VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast)  \
+    VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast)
+VMUL(sb, s8, VsrSB, VsrSH, int16_t)
+VMUL(sh, s16, VsrSH, VsrSW, int32_t)
+VMUL(sw, s32, VsrSW, VsrSD, int64_t)
+VMUL(ub, u8, VsrB, VsrH, uint16_t)
+VMUL(uh, u16, VsrH, VsrW, uint32_t)
+VMUL(uw, u32, VsrW, VsrD, uint64_t)
+#undef VMUL_DO_EVN
+#undef VMUL_DO_ODD
 #undef VMUL
 
 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
-- 
2.11.0


Re: [Qemu-devel] [PATCH 3/6] target/ppc: rework vmul{e, o}{s, u}{b, h, w} instructions to use Vsr* macros
Posted by Richard Henderson 6 years, 10 months ago
On 12/23/18 10:38 PM, Mark Cave-Ayland wrote:
> -#define VMUL_DO(name, mul_element, prod_element, cast, evenp)           \
> +#define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast)   \
>      void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
>      {                                                                   \
>          int i;                                                          \
>                                                                          \
> +        for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) {           \
> +            r->prod_access(i >> 1) = (cast)a->mul_access(i) *           \
> +                                     (cast)b->mul_access(i);            \
> +        }                                                               \
> +    }
> +
> +#define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast)   \
> +    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
> +    {                                                                   \
> +        int i;                                                          \
> +                                                                        \
> +        for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) {           \
> +            r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) *       \
> +                                     (cast)b->mul_access(i + 1);        \
>          }                                                               \
>      }

FWIW,

  for (i = odd; i < ARRAY_SIZE; i += 2) {
    r->pacc(i >> 1) = (cast)a->macc(i) * b->macc(i);
  }

is sufficient to unify these two.  But what you have isn't wrong.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~

Re: [Qemu-devel] [PATCH 3/6] target/ppc: rework vmul{e, o}{s, u}{b, h, w} instructions to use Vsr* macros
Posted by Mark Cave-Ayland 6 years, 10 months ago
On 25/12/2018 20:11, Richard Henderson wrote:

> On 12/23/18 10:38 PM, Mark Cave-Ayland wrote:
>> -#define VMUL_DO(name, mul_element, prod_element, cast, evenp)           \
>> +#define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast)   \
>>      void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
>>      {                                                                   \
>>          int i;                                                          \
>>                                                                          \
>> +        for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) {           \
>> +            r->prod_access(i >> 1) = (cast)a->mul_access(i) *           \
>> +                                     (cast)b->mul_access(i);            \
>> +        }                                                               \
>> +    }
>> +
>> +#define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast)   \
>> +    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
>> +    {                                                                   \
>> +        int i;                                                          \
>> +                                                                        \
>> +        for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) {           \
>> +            r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) *       \
>> +                                     (cast)b->mul_access(i + 1);        \
>>          }                                                               \
>>      }
> 
> FWIW,
> 
>   for (i = odd; i < ARRAY_SIZE; i += 2) {
>     r->pacc(i >> 1) = (cast)a->macc(i) * b->macc(i);
>   }
> 
> is sufficient to unify these two.  But what you have isn't wrong.
> 
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

Ah indeed that's quite neat! Thinking about it for a few days, I've decided to leave
it as-is for now, since it was useful to be able to test the odd/even variants
separately (as they were often used together in testing) and it's just that tiny bit
easier to relate to the ISA documentation.


ATB,

Mark.