The current implementations make use of the endian-specific macros MRGLO/MRGHI
and also reference HI_IDX and LO_IDX directly to calculate array offsets.
Rework the implementation to use the Vsr* macros so that these per-endian
references can be removed.
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
---
target/ppc/int_helper.c | 38 +++++++++++---------------------------
1 file changed, 11 insertions(+), 27 deletions(-)
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 598731d47a..59324b2d13 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -976,43 +976,27 @@ void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
}
}
-#define VMRG_DO(name, element, highp) \
+#define VMRG_DO(name, element, access, ofs) \
void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
{ \
ppc_avr_t result; \
int i; \
- size_t n_elems = ARRAY_SIZE(r->element); \
\
- for (i = 0; i < n_elems / 2; i++) { \
- if (highp) { \
- result.element[i*2+HI_IDX] = a->element[i]; \
- result.element[i*2+LO_IDX] = b->element[i]; \
- } else { \
- result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
- b->element[n_elems - i - 1]; \
- result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
- a->element[n_elems - i - 1]; \
- } \
+ for (i = 0; i < ARRAY_SIZE(r->element) / 2; i++) { \
+ result.access(i * 2 + 0) = a->access(i + ofs); \
+ result.access(i * 2 + 1) = b->access(i + ofs); \
} \
*r = result; \
}
-#if defined(HOST_WORDS_BIGENDIAN)
-#define MRGHI 0
-#define MRGLO 1
-#else
-#define MRGHI 1
-#define MRGLO 0
-#endif
-#define VMRG(suffix, element) \
- VMRG_DO(mrgl##suffix, element, MRGHI) \
- VMRG_DO(mrgh##suffix, element, MRGLO)
-VMRG(b, u8)
-VMRG(h, u16)
-VMRG(w, u32)
+
+#define VMRG(suffix, element, access, half) \
+ VMRG_DO(mrgl##suffix, element, access, half) \
+ VMRG_DO(mrgh##suffix, element, access, 0)
+VMRG(b, u8, VsrB, 8)
+VMRG(h, u16, VsrH, 4)
+VMRG(w, u32, VsrW, 2)
#undef VMRG_DO
#undef VMRG
-#undef MRGHI
-#undef MRGLO
void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
ppc_avr_t *b, ppc_avr_t *c)
--
2.11.0
On 1/29/19 11:17 AM, Mark Cave-Ayland wrote:
> +#define VMRG_DO(name, element, access, ofs) \
> void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
> { \
> ppc_avr_t result; \
> int i; \
> \
> + for (i = 0; i < ARRAY_SIZE(r->element) / 2; i++) { \
> + result.access(i * 2 + 0) = a->access(i + ofs); \
> + result.access(i * 2 + 1) = b->access(i + ofs); \
> } \
> *r = result; \
> }
> +
> +#define VMRG(suffix, element, access, half) \
> + VMRG_DO(mrgl##suffix, element, access, half) \
> + VMRG_DO(mrgh##suffix, element, access, 0)
> +VMRG(b, u8, VsrB, 8)
> +VMRG(h, u16, VsrH, 4)
> +VMRG(w, u32, VsrW, 2)
So... the point of "half" was to not replicate knowledge out to VMRG.
Just use
int i, half = ARRAY_SIZE(r->element) / 2;
for (i = 0; i < half; i++) {
within VMRG_DO.
r~
On 29/01/2019 23:05, Richard Henderson wrote:
> On 1/29/19 11:17 AM, Mark Cave-Ayland wrote:
>> +#define VMRG_DO(name, element, access, ofs) \
>> void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
>> { \
>> ppc_avr_t result; \
>> int i; \
>> \
>> + for (i = 0; i < ARRAY_SIZE(r->element) / 2; i++) { \
>> + result.access(i * 2 + 0) = a->access(i + ofs); \
>> + result.access(i * 2 + 1) = b->access(i + ofs); \
>> } \
>> *r = result; \
>> }
>> +
>> +#define VMRG(suffix, element, access, half) \
>> + VMRG_DO(mrgl##suffix, element, access, half) \
>> + VMRG_DO(mrgh##suffix, element, access, 0)
>> +VMRG(b, u8, VsrB, 8)
>> +VMRG(h, u16, VsrH, 4)
>> +VMRG(w, u32, VsrW, 2)
>
> So... the point of "half" was to not replicate knowledge out to VMRG.
> Just use
>
> int i, half = ARRAY_SIZE(r->element) / 2;
> for (i = 0; i < half; i++) {
>
> within VMRG_DO.
Okay - I was a bit confused because in your example macro signature you added ofs
which made me think you wanted its value to be determined outside, but never mind.
What about the following instead? With high set as part of the macro then the initial
assignment to ofs should be inlined accordingly at compile time:
#define VMRG_DO(name, element, access, high) \
void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
{ \
ppc_avr_t result; \
int ofs, half = ARRAY_SIZE(r->element) / 2; \
int i; \
\
if (high) { \
ofs = 0; \
} else { \
ofs = half; \
} \
\
for (i = 0; i < half; i++) { \
result.access(i * 2 + 0) = a->access(i + ofs); \
result.access(i * 2 + 1) = b->access(i + ofs); \
} \
*r = result; \
}
#define VMRG(suffix, element, access) \
VMRG_DO(mrgl##suffix, element, access, 0) \
VMRG_DO(mrgh##suffix, element, access, 1)
VMRG(b, u8, VsrB)
VMRG(h, u16, VsrH)
VMRG(w, u32, VsrW)
#undef VMRG_DO
#undef VMRG
#undef VMRG
ATB,
Mark.
On Wed, 30 Jan 2019, Mark Cave-Ayland wrote:
> On 29/01/2019 23:05, Richard Henderson wrote:
>
>> On 1/29/19 11:17 AM, Mark Cave-Ayland wrote:
>>> +#define VMRG_DO(name, element, access, ofs) \
>>> void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
>>> { \
>>> ppc_avr_t result; \
>>> int i; \
>>> \
>>> + for (i = 0; i < ARRAY_SIZE(r->element) / 2; i++) { \
>>> + result.access(i * 2 + 0) = a->access(i + ofs); \
>>> + result.access(i * 2 + 1) = b->access(i + ofs); \
>>> } \
>>> *r = result; \
>>> }
>>> +
>>> +#define VMRG(suffix, element, access, half) \
>>> + VMRG_DO(mrgl##suffix, element, access, half) \
>>> + VMRG_DO(mrgh##suffix, element, access, 0)
>>> +VMRG(b, u8, VsrB, 8)
>>> +VMRG(h, u16, VsrH, 4)
>>> +VMRG(w, u32, VsrW, 2)
>>
>> So... the point of "half" was to not replicate knowledge out to VMRG.
>> Just use
>>
>> int i, half = ARRAY_SIZE(r->element) / 2;
>> for (i = 0; i < half; i++) {
>>
>> within VMRG_DO.
>
> Okay - I was a bit confused because in your example macro signature you added ofs
> which made me think you wanted its value to be determined outside, but never mind.
>
> What about the following instead? With high set as part of the macro then the initial
> assignment to ofs should be inlined accordingly at compile time:
>
>
> #define VMRG_DO(name, element, access, high) \
> void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
> { \
> ppc_avr_t result; \
> int ofs, half = ARRAY_SIZE(r->element) / 2; \
> int i; \
> \
> if (high) { \
> ofs = 0; \
> } else { \
> ofs = half; \
> } \
This could be also written as:
int i, half = ARRAY_SIZE(r->element) / 2; \
int ofs = (high ? 0 : half); \
to save a few lines but it depends on personal taste which one is more
readable for someone.
Regards,
BALATON Zoltan
> \
> for (i = 0; i < half; i++) { \
> result.access(i * 2 + 0) = a->access(i + ofs); \
> result.access(i * 2 + 1) = b->access(i + ofs); \
> } \
> *r = result; \
> }
>
> #define VMRG(suffix, element, access) \
> VMRG_DO(mrgl##suffix, element, access, 0) \
> VMRG_DO(mrgh##suffix, element, access, 1)
> VMRG(b, u8, VsrB)
> VMRG(h, u16, VsrH)
> VMRG(w, u32, VsrW)
> #undef VMRG_DO
> #undef VMRG
> #undef VMRG
>
>
> ATB,
>
> Mark.
>
>
On 1/29/19 9:10 PM, Mark Cave-Ayland wrote:
>> So... the point of "half" was to not replicate knowledge out to VMRG.
>> Just use
>>
>> int i, half = ARRAY_SIZE(r->element) / 2;
>> for (i = 0; i < half; i++) {
>>
>> within VMRG_DO.
>
> Okay - I was a bit confused because in your example macro signature you
> added ofs which made me think you wanted its value to be determined outside,
> but nevermind.
>
> What about the following instead? With high set as part of the macro then
> the initial assignment to ofs should be inlined accordingly at compile time:
>
> #define VMRG_DO(name, element, access, high) \
> void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
> { \
> ppc_avr_t result; \
> int ofs, half = ARRAY_SIZE(r->element) / 2; \
> int i; \
> \
> if (high) { \
> ofs = 0; \
> } else { \
> ofs = half; \
> } \
Why are you over-complicating things? I thought I'd explicitly said this
twice, but perhaps not:
Pass the symbol "half" directly to VMRG_DO:
#define VMRG(suffix, element, access) \
VMRG_DO(mrgl##suffix, element, access, 0) \
VMRG_DO(mrgh##suffix, element, access, half)
You do not need another conditional within VMRG_DO.
Also, I'm pretty sure it's the "low" merge that wants the "ofs" to be non-zero,
since all of the Vsr* symbols implement big-endian indexing.
r~
On 30/01/2019 12:51, Richard Henderson wrote:
>
> Why are you over-complicating things? I thought I'd explicitly said this
> twice, but perhaps not:
>
> Pass the symbol "half" directly to VMRG_DO:
>
> #define VMRG(suffix, element, access) \
> VMRG_DO(mrgl##suffix, element, access, 0) \
> VMRG_DO(mrgh##suffix, element, access, half)
>
> You do not need another conditional within VMRG_DO.
I'm sorry - I think I got confused by your original macro definitions which used ofs
as both the macro parameter and variable name, which is why I thought you wanted ofs
passed in by value. Based upon the above I now have this which appears to work:
#define VMRG_DO(name, element, access, sofs) \
void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
{ \
ppc_avr_t result; \
int ofs, half = ARRAY_SIZE(r->element) / 2; \
int i; \
\
ofs = sofs; \
for (i = 0; i < half; i++) { \
result.access(i * 2 + 0) = a->access(i + ofs); \
result.access(i * 2 + 1) = b->access(i + ofs); \
} \
*r = result; \
}
#define VMRG(suffix, element, access) \
VMRG_DO(mrgl##suffix, element, access, half) \
VMRG_DO(mrgh##suffix, element, access, 0)
VMRG(b, u8, VsrB)
VMRG(h, u16, VsrH)
VMRG(w, u32, VsrW)
#undef VMRG_DO
#undef VMRG
Is this what you were intending? If this looks better then I'll resubmit a v5 later
this evening.
> Also, I'm pretty sure it's the "low" merge that wants the "ofs" to be non-zero,
> since all of the Vsr* symbols implement big-endian indexing.
I've just tried swapping them around as you suggested, but then all my OS X
background fills appear with corrupted colors. So to the best of my knowledge without
dumping the register contents directly, the above version with low = half and high =
0 still seems correct.
ATB,
Mark.
On 1/30/19 5:37 AM, Mark Cave-Ayland wrote:
> On 30/01/2019 12:51, Richard Henderson wrote:
>
>>
>> Why are you over-complicating things? I thought I'd explicitly said this
>> twice, but perhaps not:
>>
>> Pass the symbol "half" directly to VMRG_DO:
>>
>> #define VMRG(suffix, element, access) \
>> VMRG_DO(mrgl##suffix, element, access, 0) \
>> VMRG_DO(mrgh##suffix, element, access, half)
>>
>> You do not need another conditional within VMRG_DO.
>
> I'm sorry - I think I got confused by your original macro definitions which used ofs
> as both the macro parameter and variable name, which is why I thought you wanted ofs
> passed in by value. Based upon the above I now have this which appears to work:
>
>
> #define VMRG_DO(name, element, access, sofs) \
> void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
> { \
> ppc_avr_t result; \
> int ofs, half = ARRAY_SIZE(r->element) / 2; \
> int i; \
> \
> ofs = sofs; \
What is this variable and assignment for?
What does it accomplish?
> for (i = 0; i < half; i++) { \
> result.access(i * 2 + 0) = a->access(i + ofs); \
> result.access(i * 2 + 1) = b->access(i + ofs); \
> } \
> *r = result; \
> }
>
> #define VMRG(suffix, element, access) \
> VMRG_DO(mrgl##suffix, element, access, half) \
> VMRG_DO(mrgh##suffix, element, access, 0)
> VMRG(b, u8, VsrB)
> VMRG(h, u16, VsrH)
> VMRG(w, u32, VsrW)
> #undef VMRG_DO
> #undef VMRG
>
>
> Is this what you were intending? If this looks better then I'll resubmit a v5 later
> this evening.
>
>> Also, I'm pretty sure it's the "low" merge that wants the "ofs" to be non-zero,
>> since all of the Vsr* symbols implement big-endian indexing.
>
> I've just tried swapping them around as you suggested, but then all my OS X
> background fills appear with corrupted colors. So to the best of my knowledge without
> dumping the register contents directly, the above version with low = half and high =
> 0 still seems correct.
That is exactly what I said, still quoted above.
It is not what you had written in the previous version.
r~
On 30/01/2019 15:31, Richard Henderson wrote:
>> I'm sorry - I think I got confused by your original macro definitions which used ofs
>> as both the macro parameter and variable name, which is why I thought you wanted ofs
>> passed in by value. Based upon the above I now have this which appears to work:
>>
>>
>> #define VMRG_DO(name, element, access, sofs) \
>> void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
>> { \
>> ppc_avr_t result; \
>> int ofs, half = ARRAY_SIZE(r->element) / 2; \
>> int i; \
>> \
>> ofs = sofs; \
>
> What is this variable and assignment for?
> What does it accomplish?
Sigh. So I thought this was required because of the "int ofs" declaration above which
I was under the impression came from one of your previous emails. But in fact it was
me that introduced this earlier before I realised this morning that in fact the
preprocessor can do all the work.
>> for (i = 0; i < half; i++) { \
>> result.access(i * 2 + 0) = a->access(i + ofs); \
>> result.access(i * 2 + 1) = b->access(i + ofs); \
>> } \
>> *r = result; \
>> }
>>
>> #define VMRG(suffix, element, access) \
>> VMRG_DO(mrgl##suffix, element, access, half) \
>> VMRG_DO(mrgh##suffix, element, access, 0)
>> VMRG(b, u8, VsrB)
>> VMRG(h, u16, VsrH)
>> VMRG(w, u32, VsrW)
>> #undef VMRG_DO
>> #undef VMRG
>>
>>
>> Is this what you were intending? If this looks better then I'll resubmit a v5 later
>> this evening.
>>
>>> Also, I'm pretty sure it's the "low" merge that wants the "ofs" to be non-zero,
>>> since all of the Vsr* symbols implement big-endian indexing.
>>
>> I've just tried swapping them around as you suggested, but then all my OS X
>> background fills appear with corrupted colors. So to the best of my knowledge without
>> dumping the register contents directly, the above version with low = half and high =
>> 0 still seems correct.
>
> That is exactly what I said, still quoted above.
> It is not what you had written in the previous version.
Yes indeed, you are right that was a simple cut and paste error from where I was
testing both variants and pasted the wrong one into the email. Below is the hopefully
final version which I've just tested here against OS X:
#define VMRG_DO(name, element, access, ofs) \
void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
{ \
ppc_avr_t result; \
int i, half = ARRAY_SIZE(r->element) / 2; \
\
for (i = 0; i < half; i++) { \
result.access(i * 2 + 0) = a->access(i + ofs); \
result.access(i * 2 + 1) = b->access(i + ofs); \
} \
*r = result; \
}
#define VMRG(suffix, element, access) \
VMRG_DO(mrgl##suffix, element, access, half) \
VMRG_DO(mrgh##suffix, element, access, 0)
VMRG(b, u8, VsrB)
VMRG(h, u16, VsrH)
VMRG(w, u32, VsrW)
#undef VMRG_DO
#undef VMRG
ATB,
Mark.
On Wed, 30 Jan 2019, Mark Cave-Ayland wrote:
> On 30/01/2019 12:51, Richard Henderson wrote:
>
>>
>> Why are you over-complicating things? I thought I'd explicitly said this
>> twice, but perhaps not:
>>
>> Pass the symbol "half" directly to VMRG_DO:
>>
>> #define VMRG(suffix, element, access) \
>> VMRG_DO(mrgl##suffix, element, access, 0) \
>> VMRG_DO(mrgh##suffix, element, access, half)
>>
>> You do not need another conditional within VMRG_DO.
>
> I'm sorry - I think I got confused by your original macro definitions which used ofs
> as both the macro parameter and variable name, which is why I thought you wanted ofs
> passed in by value. Based upon the above I now have this which appears to work:
>
>
> #define VMRG_DO(name, element, access, sofs) \
> void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
> { \
> ppc_avr_t result; \
> int ofs, half = ARRAY_SIZE(r->element) / 2; \
> int i; \
> \
> ofs = sofs; \
I think you don't need the ofs local variable, you can just use the macro
parameter (in paranthesis but in this case that does not matter, just
usual precaution in case expression is passed to macro instead of
constant). This is a macro so it will be literal replace, that's the trick
we were missing from Richard's suggestion I think so don't look at it as
a function parameter.
Also I wonder if you really need the result local? Can't it just access
the result via *r directly and save a copy at the end? (Although
that probably would be optimised out by the compiler anyway.)
By the way, thanks a lot for doing this and sorry that my comment caused
so much trouble but your benchmark has proven my suspicion that it would
have been two times slower with your first version.
Regards,
BALATON Zoltan
> for (i = 0; i < half; i++) { \
> result.access(i * 2 + 0) = a->access(i + ofs); \
> result.access(i * 2 + 1) = b->access(i + ofs); \
> } \
> *r = result; \
> }
>
> #define VMRG(suffix, element, access) \
> VMRG_DO(mrgl##suffix, element, access, half) \
> VMRG_DO(mrgh##suffix, element, access, 0)
> VMRG(b, u8, VsrB)
> VMRG(h, u16, VsrH)
> VMRG(w, u32, VsrW)
> #undef VMRG_DO
> #undef VMRG
>
>
> Is this what you were intending? If this looks better then I'll resubmit a v5 later
> this evening.
>
>> Also, I'm pretty sure it's the "low" merge that wants the "ofs" to be non-zero,
>> since all of the Vsr* symbols implement big-endian indexing.
>
> I've just tried swapping them around as you suggested, but then all my OS X
> background fills appear with corrupted colors. So to the best of my knowledge without
> dumping the register contents directly, the above version with low = half and high =
> 0 still seems correct.
>
>
> ATB,
>
> Mark.
>
>
On 1/30/19 8:06 AM, BALATON Zoltan wrote: > Also I wonder if you really need the result local? Can't it just access the > result via *r directly and save a copy at the end? (Although that probably > would be optimised out by the compiler anyway.) I don't think so, because of potential overlap between *r and *a or *b. r~
© 2016 - 2025 Red Hat, Inc.