vmulhsw: Vector Multiply High Signed Word
vmulhuw: Vector Multiply High Unsigned Word
Signed-off-by: Lijun Pan <ljp@linux.ibm.com>
---
target/ppc/helper.h | 2 ++
target/ppc/int_helper.c | 14 ++++++++++++++
target/ppc/translate/vmx-impl.inc.c | 6 ++++++
target/ppc/translate/vmx-ops.inc.c | 4 ++--
4 files changed, 24 insertions(+), 2 deletions(-)
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index c3f087ccb3..6d4a3536eb 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -186,6 +186,8 @@ DEF_HELPER_3(vmulouh, void, avr, avr, avr)
DEF_HELPER_3(vmulouw, void, avr, avr, avr)
DEF_HELPER_3(vmuluwm, void, avr, avr, avr)
DEF_HELPER_3(vmulld, void, avr, avr, avr)
+DEF_HELPER_3(vmulhsw, void, avr, avr, avr)
+DEF_HELPER_3(vmulhuw, void, avr, avr, avr)
DEF_HELPER_3(vslo, void, avr, avr, avr)
DEF_HELPER_3(vsro, void, avr, avr, avr)
DEF_HELPER_3(vsrv, void, avr, avr, avr)
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index afbcdd05b4..4bb3b7e928 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -523,6 +523,20 @@ void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
r->VsrD(0) = 0;
}
+#define VMULH_DO(name, op, element, cast_orig, cast_temp) \
+ void helper_vmulh##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
+ { \
+ int i; \
+ \
+ for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
+ r->element[i] = (cast_orig)(((cast_temp)a->element[i] op \
+ (cast_temp)b->element[i]) >> 32); \
+ } \
+ }
+VMULH_DO(sw, *, s32, int32_t, int64_t)
+VMULH_DO(uw, *, u32, uint32_t, uint64_t)
+#undef VMULH_DO
+
#define VARITH_DO(name, op, element) \
void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
{ \
diff --git a/target/ppc/translate/vmx-impl.inc.c b/target/ppc/translate/vmx-impl.inc.c
index 4ee1df48f2..2c35559c52 100644
--- a/target/ppc/translate/vmx-impl.inc.c
+++ b/target/ppc/translate/vmx-impl.inc.c
@@ -811,9 +811,15 @@ GEN_VXFORM(vmulld, 4, 7);
GEN_VXFORM(vmuleub, 4, 8);
GEN_VXFORM(vmuleuh, 4, 9);
GEN_VXFORM(vmuleuw, 4, 10);
+GEN_VXFORM(vmulhuw, 4, 10);
+GEN_VXFORM_DUAL(vmuleuw, PPC_ALTIVEC, PPC_NONE,
+ vmulhuw, PPC_NONE, PPC2_ISA300);
GEN_VXFORM(vmulesb, 4, 12);
GEN_VXFORM(vmulesh, 4, 13);
GEN_VXFORM(vmulesw, 4, 14);
+GEN_VXFORM(vmulhsw, 4, 14);
+GEN_VXFORM_DUAL(vmulesw, PPC_ALTIVEC, PPC_NONE,
+ vmulhsw, PPC_NONE, PPC2_ISA300);
GEN_VXFORM_V(vslb, MO_8, tcg_gen_gvec_shlv, 2, 4);
GEN_VXFORM_V(vslh, MO_16, tcg_gen_gvec_shlv, 2, 5);
GEN_VXFORM_V(vslw, MO_32, tcg_gen_gvec_shlv, 2, 6);
diff --git a/target/ppc/translate/vmx-ops.inc.c b/target/ppc/translate/vmx-ops.inc.c
index 499bed0a44..1d8238a718 100644
--- a/target/ppc/translate/vmx-ops.inc.c
+++ b/target/ppc/translate/vmx-ops.inc.c
@@ -107,10 +107,10 @@ GEN_VXFORM_207(vmulosw, 4, 6),
GEN_VXFORM_300(vmulld, 4, 7),
GEN_VXFORM(vmuleub, 4, 8),
GEN_VXFORM(vmuleuh, 4, 9),
-GEN_VXFORM_207(vmuleuw, 4, 10),
+GEN_VXFORM_DUAL(vmuleuw, vmulhuw, 4, 10, PPC_ALTIVEC, PPC_NONE),
GEN_VXFORM(vmulesb, 4, 12),
GEN_VXFORM(vmulesh, 4, 13),
-GEN_VXFORM_207(vmulesw, 4, 14),
+GEN_VXFORM_DUAL(vmulesw, vmulhsw, 4, 14, PPC_ALTIVEC, PPC_NONE),
GEN_VXFORM(vslb, 2, 4),
GEN_VXFORM(vslh, 2, 5),
GEN_VXFORM_DUAL(vslw, vrlwnm, 2, 6, PPC_ALTIVEC, PPC_NONE),
--
2.23.0
On 6/12/20 9:20 PM, Lijun Pan wrote:
> +#define VMULH_DO(name, op, element, cast_orig, cast_temp) \
> + void helper_vmulh##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
> + { \
> + int i; \
> + \
> + for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
> + r->element[i] = (cast_orig)(((cast_temp)a->element[i] op \
> + (cast_temp)b->element[i]) >> 32); \
> + } \
> + }
> +VMULH_DO(sw, *, s32, int32_t, int64_t)
> +VMULH_DO(uw, *, u32, uint32_t, uint64_t)
> +#undef VMULH_DO
There's no point in calling the macro "VMUL" and then passing in "op" as a
parameter. Just inline the multiply directly.
Also, fix your indentation.
r~
> On Jun 18, 2020, at 6:29 PM, Richard Henderson <richard.henderson@linaro.org> wrote:
>
> On 6/12/20 9:20 PM, Lijun Pan wrote:
>> +#define VMULH_DO(name, op, element, cast_orig, cast_temp) \
>> + void helper_vmulh##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
>> + { \
>> + int i; \
>> + \
>> + for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
>> + r->element[i] = (cast_orig)(((cast_temp)a->element[i] op \
>> + (cast_temp)b->element[i]) >> 32); \
>> + } \
>> + }
>> +VMULH_DO(sw, *, s32, int32_t, int64_t)
>> +VMULH_DO(uw, *, u32, uint32_t, uint64_t)
>> +#undef VMULH_DO
>
> There's no point in calling the macro "VMUL" and then passing in "op" as a
> parameter. Just inline the multiply directly.
Do you mean writing two functions directly,
void helper_vmulhsw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
{
int i;
for (i = 0; i < 4; i++) {
r->s32[i] = (int32_t)((int64_t)a->s32[i] * (int64_t)b->s32[i]) >> 32);
}
}
void helper_vmulhuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
{
int i;
for (i = 0; i < 4; i++) {
r->u32[i] = (uint32_t)((uint64_t)a->u32[i] * (uint64_t)b->u32[i]) >> 32);
}
}
Thanks,
Lijun
On 6/18/20 10:37 PM, Lijun Pan wrote:
> Do you mean writing two functions directly,
>
> void helper_vmulhsw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
> {
> int i;
>
> for (i = 0; i < 4; i++) {
> r->s32[i] = (int32_t)((int64_t)a->s32[i] * (int64_t)b->s32[i]) >> 32);
> }
> }
>
> void helper_vmulhuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
> {
> int i;
>
> for (i = 0; i < 4; i++) {
> r->u32[i] = (uint32_t)((uint64_t)a->u32[i] * (uint64_t)b->u32[i]) >> 32);
> }
> }
That works for me.
r~
© 2016 - 2026 Red Hat, Inc.