The unit operation for fmul8x16 and friends is described in the
manual as "MS16b". Split that out for clarity. Improve rounding
with an unconditional addition of 0.5 as a fixed-point integer.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/sparc/vis_helper.c | 78 ++++++++++++---------------------------
1 file changed, 24 insertions(+), 54 deletions(-)
diff --git a/target/sparc/vis_helper.c b/target/sparc/vis_helper.c
index 14c665cad6..e15c6bb34e 100644
--- a/target/sparc/vis_helper.c
+++ b/target/sparc/vis_helper.c
@@ -44,6 +44,7 @@ target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize)
#if HOST_BIG_ENDIAN
#define VIS_B64(n) b[7 - (n)]
+#define VIS_SB64(n) sb[7 - (n)]
#define VIS_W64(n) w[3 - (n)]
#define VIS_SW64(n) sw[3 - (n)]
#define VIS_L64(n) l[1 - (n)]
@@ -51,6 +52,7 @@ target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize)
#define VIS_W32(n) w[1 - (n)]
#else
#define VIS_B64(n) b[n]
+#define VIS_SB64(n) sb[n]
#define VIS_W64(n) w[n]
#define VIS_SW64(n) sw[n]
#define VIS_L64(n) l[n]
@@ -60,6 +62,7 @@ target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize)
typedef union {
uint8_t b[8];
+ int8_t sb[8];
uint16_t w[4];
int16_t sw[4];
uint32_t l[2];
@@ -95,27 +98,23 @@ uint64_t helper_fpmerge(uint32_t src1, uint32_t src2)
return d.ll;
}
+static inline int do_ms16b(int x, int y)
+{
+ return ((x * y) + 0x80) >> 8;
+}
+
uint64_t helper_fmul8x16(uint32_t src1, uint64_t src2)
{
VIS64 d;
VIS32 s;
- uint32_t tmp;
s.l = src1;
d.ll = src2;
-#define PMUL(r) \
- tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B32(r); \
- if ((tmp & 0xff) > 0x7f) { \
- tmp += 0x100; \
- } \
- d.VIS_W64(r) = tmp >> 8;
-
- PMUL(0);
- PMUL(1);
- PMUL(2);
- PMUL(3);
-#undef PMUL
+ d.VIS_W64(0) = do_ms16b(s.VIS_B32(0), d.VIS_SW64(0));
+ d.VIS_W64(1) = do_ms16b(s.VIS_B32(1), d.VIS_SW64(1));
+ d.VIS_W64(2) = do_ms16b(s.VIS_B32(2), d.VIS_SW64(2));
+ d.VIS_W64(3) = do_ms16b(s.VIS_B32(3), d.VIS_SW64(3));
return d.ll;
}
@@ -124,25 +123,14 @@ uint64_t helper_fmul8x16a(uint32_t src1, int32_t src2)
{
VIS32 s;
VIS64 d;
- uint32_t tmp;
s.l = src1;
d.ll = 0;
-#define PMUL(r) \
- do { \
- tmp = src2 * (int32_t)s.VIS_B32(r); \
- if ((tmp & 0xff) > 0x7f) { \
- tmp += 0x100; \
- } \
- d.VIS_W64(r) = tmp >> 8; \
- } while (0)
-
- PMUL(0);
- PMUL(1);
- PMUL(2);
- PMUL(3);
-#undef PMUL
+ d.VIS_W64(0) = do_ms16b(s.VIS_B32(0), src2);
+ d.VIS_W64(1) = do_ms16b(s.VIS_B32(1), src2);
+ d.VIS_W64(2) = do_ms16b(s.VIS_B32(2), src2);
+ d.VIS_W64(3) = do_ms16b(s.VIS_B32(3), src2);
return d.ll;
}
@@ -150,23 +138,14 @@ uint64_t helper_fmul8x16a(uint32_t src1, int32_t src2)
uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2)
{
VIS64 s, d;
- uint32_t tmp;
s.ll = src1;
d.ll = src2;
-#define PMUL(r) \
- tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \
- if ((tmp & 0xff) > 0x7f) { \
- tmp += 0x100; \
- } \
- d.VIS_W64(r) = tmp >> 8;
-
- PMUL(0);
- PMUL(1);
- PMUL(2);
- PMUL(3);
-#undef PMUL
+ d.VIS_W64(0) = do_ms16b(s.VIS_SB64(1), d.VIS_SW64(0));
+ d.VIS_W64(1) = do_ms16b(s.VIS_SB64(3), d.VIS_SW64(1));
+ d.VIS_W64(2) = do_ms16b(s.VIS_SB64(5), d.VIS_SW64(2));
+ d.VIS_W64(3) = do_ms16b(s.VIS_SB64(7), d.VIS_SW64(3));
return d.ll;
}
@@ -174,23 +153,14 @@ uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2)
uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2)
{
VIS64 s, d;
- uint32_t tmp;
s.ll = src1;
d.ll = src2;
-#define PMUL(r) \
- tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \
- if ((tmp & 0xff) > 0x7f) { \
- tmp += 0x100; \
- } \
- d.VIS_W64(r) = tmp >> 8;
-
- PMUL(0);
- PMUL(1);
- PMUL(2);
- PMUL(3);
-#undef PMUL
+ d.VIS_W64(0) = do_ms16b(s.VIS_B64(0), d.VIS_SW64(0));
+ d.VIS_W64(1) = do_ms16b(s.VIS_B64(2), d.VIS_SW64(1));
+ d.VIS_W64(2) = do_ms16b(s.VIS_B64(4), d.VIS_SW64(2));
+ d.VIS_W64(3) = do_ms16b(s.VIS_B64(6), d.VIS_SW64(3));
return d.ll;
}
--
2.34.1
On 2/5/24 18:55, Richard Henderson wrote:
> The unit operation for fmul8x16 and friends is described in the
> manual as "MS16b". Split that out for clarity. Improve rounding
> with an unconditional addition of 0.5 as a fixed-point integer.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/sparc/vis_helper.c | 78 ++++++++++++---------------------------
> 1 file changed, 24 insertions(+), 54 deletions(-)
> @@ -150,23 +138,14 @@ uint64_t helper_fmul8x16a(uint32_t src1, int32_t src2)
> uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2)
> {
> VIS64 s, d;
> - uint32_t tmp;
>
> s.ll = src1;
> d.ll = src2;
>
> -#define PMUL(r) \
> - tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \
> - if ((tmp & 0xff) > 0x7f) { \
> - tmp += 0x100; \
> - } \
> - d.VIS_W64(r) = tmp >> 8;
> -
> - PMUL(0);
> - PMUL(1);
> - PMUL(2);
> - PMUL(3);
> -#undef PMUL
> + d.VIS_W64(0) = do_ms16b(s.VIS_SB64(1), d.VIS_SW64(0));
s.VIS_SB64(1) = upper bit, OK.
> + d.VIS_W64(1) = do_ms16b(s.VIS_SB64(3), d.VIS_SW64(1));
> + d.VIS_W64(2) = do_ms16b(s.VIS_SB64(5), d.VIS_SW64(2));
> + d.VIS_W64(3) = do_ms16b(s.VIS_SB64(7), d.VIS_SW64(3));
>
> return d.ll;
> }
> @@ -174,23 +153,14 @@ uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2)
> uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2)
> {
> VIS64 s, d;
> - uint32_t tmp;
>
> s.ll = src1;
> d.ll = src2;
>
> -#define PMUL(r) \
> - tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \
> - if ((tmp & 0xff) > 0x7f) { \
> - tmp += 0x100; \
> - } \
> - d.VIS_W64(r) = tmp >> 8;
> -
> - PMUL(0);
> - PMUL(1);
> - PMUL(2);
> - PMUL(3);
> -#undef PMUL
> + d.VIS_W64(0) = do_ms16b(s.VIS_B64(0), d.VIS_SW64(0));
s.VIS_B64(0) for lower bit, OK.
> + d.VIS_W64(1) = do_ms16b(s.VIS_B64(2), d.VIS_SW64(1));
> + d.VIS_W64(2) = do_ms16b(s.VIS_B64(4), d.VIS_SW64(2));
> + d.VIS_W64(3) = do_ms16b(s.VIS_B64(6), d.VIS_SW64(3));
>
> return d.ll;
> }
Maybe add a comment for high/low bits in fmul8sux16/fmul8ulx16,
as it was not obvious at first. Otherwise,
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
On 3/5/24 21:11, Philippe Mathieu-Daudé wrote:
> On 2/5/24 18:55, Richard Henderson wrote:
>> The unit operation for fmul8x16 and friends is described in the
>> manual as "MS16b". Split that out for clarity. Improve rounding
>> with an unconditional addition of 0.5 as a fixed-point integer.
>>
>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>> ---
>> target/sparc/vis_helper.c | 78 ++++++++++++---------------------------
>> 1 file changed, 24 insertions(+), 54 deletions(-)
>
>
>> @@ -150,23 +138,14 @@ uint64_t helper_fmul8x16a(uint32_t src1, int32_t
>> src2)
>> uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2)
>> {
>> VIS64 s, d;
>> - uint32_t tmp;
>> s.ll = src1;
>> d.ll = src2;
>> -#define
>> PMUL(r) \
>> - tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >>
>> 8); \
>> - if ((tmp & 0xff) > 0x7f)
>> { \
>> - tmp +=
>> 0x100; \
>> -
>> } \
>> - d.VIS_W64(r) = tmp >> 8;
>> -
>> - PMUL(0);
>> - PMUL(1);
>> - PMUL(2);
>> - PMUL(3);
>> -#undef PMUL
>> + d.VIS_W64(0) = do_ms16b(s.VIS_SB64(1), d.VIS_SW64(0));
>
> s.VIS_SB64(1) = upper bit, OK.
I meant "bits" (plural)!
>
>> + d.VIS_W64(1) = do_ms16b(s.VIS_SB64(3), d.VIS_SW64(1));
>> + d.VIS_W64(2) = do_ms16b(s.VIS_SB64(5), d.VIS_SW64(2));
>> + d.VIS_W64(3) = do_ms16b(s.VIS_SB64(7), d.VIS_SW64(3));
>> return d.ll;
>> }
>> @@ -174,23 +153,14 @@ uint64_t helper_fmul8sux16(uint64_t src1,
>> uint64_t src2)
>> uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2)
>> {
>> VIS64 s, d;
>> - uint32_t tmp;
>> s.ll = src1;
>> d.ll = src2;
>> -#define
>> PMUL(r) \
>> - tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r *
>> 2)); \
>> - if ((tmp & 0xff) > 0x7f)
>> { \
>> - tmp +=
>> 0x100; \
>> -
>> } \
>> - d.VIS_W64(r) = tmp >> 8;
>> -
>> - PMUL(0);
>> - PMUL(1);
>> - PMUL(2);
>> - PMUL(3);
>> -#undef PMUL
>> + d.VIS_W64(0) = do_ms16b(s.VIS_B64(0), d.VIS_SW64(0));
>
> s.VIS_B64(0) for lower bit, OK.
Ditto.
>
>> + d.VIS_W64(1) = do_ms16b(s.VIS_B64(2), d.VIS_SW64(1));
>> + d.VIS_W64(2) = do_ms16b(s.VIS_B64(4), d.VIS_SW64(2));
>> + d.VIS_W64(3) = do_ms16b(s.VIS_B64(6), d.VIS_SW64(3));
>> return d.ll;
>> }
>
> Maybe add a comment for high/low bits in fmul8sux16/fmul8ulx16,
> as it was not obvious at first. Otherwise,
>
> Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
>
© 2016 - 2026 Red Hat, Inc.