Message ID | 20190129191746.14868-3-mark.cave-ayland@ilande.co.uk |
---|---|
State | New |
Headers | show |
Series | target/ppc: remove various endian hacks from int_helper.c | expand |
On 1/29/19 11:17 AM, Mark Cave-Ayland wrote: > +#define VMRG_DO(name, element, access, ofs) \ > void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ > { \ > ppc_avr_t result; \ > int i; \ > \ > + for (i = 0; i < ARRAY_SIZE(r->element) / 2; i++) { \ > + result.access(i * 2 + 0) = a->access(i + ofs); \ > + result.access(i * 2 + 1) = b->access(i + ofs); \ > } \ > *r = result; \ > } > + > +#define VMRG(suffix, element, access, half) \ > + VMRG_DO(mrgl##suffix, element, access, half) \ > + VMRG_DO(mrgh##suffix, element, access, 0) > +VMRG(b, u8, VsrB, 8) > +VMRG(h, u16, VsrH, 4) > +VMRG(w, u32, VsrW, 2) So... the point of "half" was to not replicate knowledge out to VMRG. Just use int i, half = ARRAY_SIZE(r->element) / 2; for (i = 0; i < half; i++) { within VMRG_DO. r~
On 29/01/2019 23:05, Richard Henderson wrote: > On 1/29/19 11:17 AM, Mark Cave-Ayland wrote: >> +#define VMRG_DO(name, element, access, ofs) \ >> void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ >> { \ >> ppc_avr_t result; \ >> int i; \ >> \ >> + for (i = 0; i < ARRAY_SIZE(r->element) / 2; i++) { \ >> + result.access(i * 2 + 0) = a->access(i + ofs); \ >> + result.access(i * 2 + 1) = b->access(i + ofs); \ >> } \ >> *r = result; \ >> } >> + >> +#define VMRG(suffix, element, access, half) \ >> + VMRG_DO(mrgl##suffix, element, access, half) \ >> + VMRG_DO(mrgh##suffix, element, access, 0) >> +VMRG(b, u8, VsrB, 8) >> +VMRG(h, u16, VsrH, 4) >> +VMRG(w, u32, VsrW, 2) > > So... the point of "half" was to not replicate knowledge out to VMRG. > Just use > > int i, half = ARRAY_SIZE(r->element) / 2; > for (i = 0; i < half; i++) { > > within VMRG_DO. Okay - I was a bit confused because in your example macro signature you added ofs which made me think you wanted its value to be determined outside, but never mind. What about the following instead? With high set as part of the macro then the initial assignment to ofs should be inlined accordingly at compile time: #define VMRG_DO(name, element, access, high) \ void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ { \ ppc_avr_t result; \ int ofs, half = ARRAY_SIZE(r->element) / 2; \ int i; \ \ if (high) { \ ofs = 0; \ } else { \ ofs = half; \ } \ \ for (i = 0; i < half; i++) { \ result.access(i * 2 + 0) = a->access(i + ofs); \ result.access(i * 2 + 1) = b->access(i + ofs); \ } \ *r = result; \ } #define VMRG(suffix, element, access) \ VMRG_DO(mrgl##suffix, element, access, 0) \ VMRG_DO(mrgh##suffix, element, access, 1) VMRG(b, u8, VsrB) VMRG(h, u16, VsrH) VMRG(w, u32, VsrW) #undef VMRG_DO #undef VMRG #undef VMRG ATB, Mark.
On Wed, 30 Jan 2019, Mark Cave-Ayland wrote: > On 29/01/2019 23:05, Richard Henderson wrote: > >> On 1/29/19 11:17 AM, Mark Cave-Ayland wrote: >>> +#define VMRG_DO(name, element, access, ofs) \ >>> void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ >>> { \ >>> ppc_avr_t result; \ >>> int i; \ >>> \ >>> + for (i = 0; i < ARRAY_SIZE(r->element) / 2; i++) { \ >>> + result.access(i * 2 + 0) = a->access(i + ofs); \ >>> + result.access(i * 2 + 1) = b->access(i + ofs); \ >>> } \ >>> *r = result; \ >>> } >>> + >>> +#define VMRG(suffix, element, access, half) \ >>> + VMRG_DO(mrgl##suffix, element, access, half) \ >>> + VMRG_DO(mrgh##suffix, element, access, 0) >>> +VMRG(b, u8, VsrB, 8) >>> +VMRG(h, u16, VsrH, 4) >>> +VMRG(w, u32, VsrW, 2) >> >> So... the point of "half" was to not replicate knowledge out to VMRG. >> Just use >> >> int i, half = ARRAY_SIZE(r->element) / 2; >> for (i = 0; i < half; i++) { >> >> within VMRG_DO. > > Okay - I was a bit confused because in your example macro signature you added ofs > which made me think you wanted its value to be determined outside, but never mind. > > What about the following instead? With high set as part of the macro then the initial > assignment to ofs should be inlined accordingly at compile time: > > > #define VMRG_DO(name, element, access, high) \ > void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ > { \ > ppc_avr_t result; \ > int ofs, half = ARRAY_SIZE(r->element) / 2; \ > int i; \ > \ > if (high) { \ > ofs = 0; \ > } else { \ > ofs = half; \ > } \ This could be also written as: int i, half = ARRAY_SIZE(r->element) / 2; \ int ofs = (high ? 0 : half); \ to save a few lines but it depends on personal taste which one is more readable for someone. Regards, BALATON Zoltan > \ > for (i = 0; i < half; i++) { \ > result.access(i * 2 + 0) = a->access(i + ofs); \ > result.access(i * 2 + 1) = b->access(i + ofs); \ > } \ > *r = result; \ > } > > #define VMRG(suffix, element, access) \ > VMRG_DO(mrgl##suffix, element, access, 0) \ > VMRG_DO(mrgh##suffix, element, access, 1) > VMRG(b, u8, VsrB) > VMRG(h, u16, VsrH) > VMRG(w, u32, VsrW) > #undef VMRG_DO > #undef VMRG > #undef VMRG > > > ATB, > > Mark. > >
On 1/29/19 9:10 PM, Mark Cave-Ayland wrote: >> So... the point of "half" was to not replicate knowledge out to VMRG. >> Just use >> >> int i, half = ARRAY_SIZE(r->element) / 2; >> for (i = 0; i < half; i++) { >> >> within VMRG_DO. > > Okay - I was a bit confused because in your example macro signature you > added ofs which made me think you wanted its value to be determined outside, > but nevermind. > > What about the following instead? With high set as part of the macro then > the initial assignment to ofs should be inlined accordingly at compile time: > > #define VMRG_DO(name, element, access, high) \ > void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ > { \ > ppc_avr_t result; \ > int ofs, half = ARRAY_SIZE(r->element) / 2; \ > int i; \ > \ > if (high) { \ > ofs = 0; \ > } else { \ > ofs = half; \ > } \ Why are you over-complicating things? I thought I'd explicitly said this twice, but perhaps not: Pass the symbol "half" directly to VMRG_DO: #define VMRG(suffix, element, access) \ VMRG_DO(mrgl##suffix, element, access, 0) \ VMRG_DO(mrgh##suffix, element, access, half) You do not need another conditional within VMRG_DO. Also, I'm pretty sure it's the "low" merge that wants the "ofs" to be non-zero, since all of the Vsr* symbols implement big-endian indexing. r~
On 30/01/2019 12:51, Richard Henderson wrote: > > Why are you over-complicating things? I thought I'd explicitly said this > twice, but perhaps not: > > Pass the symbol "half" directly to VMRG_DO: > > #define VMRG(suffix, element, access) \ > VMRG_DO(mrgl##suffix, element, access, 0) \ > VMRG_DO(mrgh##suffix, element, access, half) > > You do not need another conditional within VMRG_DO. I'm sorry - I think I got confused by your original macro definitions which used ofs as both the macro parameter and variable name, which is why I thought you wanted ofs passed in by value. Based upon the above I now have this which appears to work: #define VMRG_DO(name, element, access, sofs) \ void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ { \ ppc_avr_t result; \ int ofs, half = ARRAY_SIZE(r->element) / 2; \ int i; \ \ ofs = sofs; \ for (i = 0; i < half; i++) { \ result.access(i * 2 + 0) = a->access(i + ofs); \ result.access(i * 2 + 1) = b->access(i + ofs); \ } \ *r = result; \ } #define VMRG(suffix, element, access) \ VMRG_DO(mrgl##suffix, element, access, half) \ VMRG_DO(mrgh##suffix, element, access, 0) VMRG(b, u8, VsrB) VMRG(h, u16, VsrH) VMRG(w, u32, VsrW) #undef VMRG_DO #undef VMRG Is this what you were intending? If this looks better then I'll resubmit a v5 later this evening. > Also, I'm pretty sure it's the "low" merge that wants the "ofs" to be non-zero, > since all of the Vsr* symbols implement big-endian indexing. I've just tried swapping them around as you suggested, but then all my OS X background fills appear with corrupted colors. So to the best of my knowledge without dumping the register contents directly, the above version with low = half and high = 0 still seems correct. ATB, Mark.
On 1/30/19 5:37 AM, Mark Cave-Ayland wrote: > On 30/01/2019 12:51, Richard Henderson wrote: > >> >> Why are you over-complicating things? I thought I'd explicitly said this >> twice, but perhaps not: >> >> Pass the symbol "half" directly to VMRG_DO: >> >> #define VMRG(suffix, element, access) \ >> VMRG_DO(mrgl##suffix, element, access, 0) \ >> VMRG_DO(mrgh##suffix, element, access, half) >> >> You do not need another conditional within VMRG_DO. > > I'm sorry - I think I got confused by your original macro definitions which used ofs > as both the macro parameter and variable name, which is why I thought you wanted ofs > passed in by value. Based upon the above I now have this which appears to work: > > > #define VMRG_DO(name, element, access, sofs) \ > void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ > { \ > ppc_avr_t result; \ > int ofs, half = ARRAY_SIZE(r->element) / 2; \ > int i; \ > \ > ofs = sofs; \ What is this variable and assignment for? What does it accomplish? > for (i = 0; i < half; i++) { \ > result.access(i * 2 + 0) = a->access(i + ofs); \ > result.access(i * 2 + 1) = b->access(i + ofs); \ > } \ > *r = result; \ > } > > #define VMRG(suffix, element, access) \ > VMRG_DO(mrgl##suffix, element, access, half) \ > VMRG_DO(mrgh##suffix, element, access, 0) > VMRG(b, u8, VsrB) > VMRG(h, u16, VsrH) > VMRG(w, u32, VsrW) > #undef VMRG_DO > #undef VMRG > > > Is this what you were intending? If this looks better then I'll resubmit a v5 later > this evening. > >> Also, I'm pretty sure it's the "low" merge that wants the "ofs" to be non-zero, >> since all of the Vsr* symbols implement big-endian indexing. > > I've just tried swapping them around as you suggested, but then all my OS X > background fills appear with corrupted colors. So to the best of my knowledge without > dumping the register contents directly, the above version with low = half and high = > 0 still seems correct. That is exactly what I said, still quoted above. It is not what you had written in the previous version. r~
On Wed, 30 Jan 2019, Mark Cave-Ayland wrote: > On 30/01/2019 12:51, Richard Henderson wrote: > >> >> Why are you over-complicating things? I thought I'd explicitly said this >> twice, but perhaps not: >> >> Pass the symbol "half" directly to VMRG_DO: >> >> #define VMRG(suffix, element, access) \ >> VMRG_DO(mrgl##suffix, element, access, 0) \ >> VMRG_DO(mrgh##suffix, element, access, half) >> >> You do not need another conditional within VMRG_DO. > > I'm sorry - I think I got confused by your original macro definitions which used ofs > as both the macro parameter and variable name, which is why I thought you wanted ofs > passed in by value. Based upon the above I now have this which appears to work: > > > #define VMRG_DO(name, element, access, sofs) \ > void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ > { \ > ppc_avr_t result; \ > int ofs, half = ARRAY_SIZE(r->element) / 2; \ > int i; \ > \ > ofs = sofs; \ I think you don't need the ofs local variable, you can just use the macro parameter (in paranthesis but in this case that does not matter, just usual precaution in case expression is passed to macro instead of constant). This is a macro so it will be literal replace, that's the trick we were missing from Richard's suggestion I think so don't look at it as a function parameter. Also I wonder if you really need the result local? Can't it just access the result via *r directly and save a copy at the end? (Although that probably would be optimised out by the compiler anyway.) By the way, thanks a lot for doing this and sorry that my comment caused so much trouble but your benchmark has proven my suspicion that it would have been two times slower with your first version. Regards, BALATON Zoltan > for (i = 0; i < half; i++) { \ > result.access(i * 2 + 0) = a->access(i + ofs); \ > result.access(i * 2 + 1) = b->access(i + ofs); \ > } \ > *r = result; \ > } > > #define VMRG(suffix, element, access) \ > VMRG_DO(mrgl##suffix, element, access, half) \ > VMRG_DO(mrgh##suffix, element, access, 0) > VMRG(b, u8, VsrB) > VMRG(h, u16, VsrH) > VMRG(w, u32, VsrW) > #undef VMRG_DO > #undef VMRG > > > Is this what you were intending? If this looks better then I'll resubmit a v5 later > this evening. > >> Also, I'm pretty sure it's the "low" merge that wants the "ofs" to be non-zero, >> since all of the Vsr* symbols implement big-endian indexing. > > I've just tried swapping them around as you suggested, but then all my OS X > background fills appear with corrupted colors. So to the best of my knowledge without > dumping the register contents directly, the above version with low = half and high = > 0 still seems correct. > > > ATB, > > Mark. > >
On 1/30/19 8:06 AM, BALATON Zoltan wrote: > Also I wonder if you really need the result local? Can't it just access the > result via *r directly and save a copy at the end? (Although that probably > would be optimised out by the compiler anyway.) I don't think so, because of potential overlap between *r and *a or *b. r~
On 30/01/2019 15:31, Richard Henderson wrote: >> I'm sorry - I think I got confused by your original macro definitions which used ofs >> as both the macro parameter and variable name, which is why I thought you wanted ofs >> passed in by value. Based upon the above I now have this which appears to work: >> >> >> #define VMRG_DO(name, element, access, sofs) \ >> void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ >> { \ >> ppc_avr_t result; \ >> int ofs, half = ARRAY_SIZE(r->element) / 2; \ >> int i; \ >> \ >> ofs = sofs; \ > > What is this variable and assignment for? > What does it accomplish? Sigh. So I thought this was required because of the "int ofs" declaration above which I was under the impression came from one of your previous emails. But in fact it was me that introduced this earlier before I realised this morning that in fact the preprocessor can do all the work. >> for (i = 0; i < half; i++) { \ >> result.access(i * 2 + 0) = a->access(i + ofs); \ >> result.access(i * 2 + 1) = b->access(i + ofs); \ >> } \ >> *r = result; \ >> } >> >> #define VMRG(suffix, element, access) \ >> VMRG_DO(mrgl##suffix, element, access, half) \ >> VMRG_DO(mrgh##suffix, element, access, 0) >> VMRG(b, u8, VsrB) >> VMRG(h, u16, VsrH) >> VMRG(w, u32, VsrW) >> #undef VMRG_DO >> #undef VMRG >> >> >> Is this what you were intending? If this looks better then I'll resubmit a v5 later >> this evening. >> >>> Also, I'm pretty sure it's the "low" merge that wants the "ofs" to be non-zero, >>> since all of the Vsr* symbols implement big-endian indexing. >> >> I've just tried swapping them around as you suggested, but then all my OS X >> background fills appear with corrupted colors. So to the best of my knowledge without >> dumping the register contents directly, the above version with low = half and high = >> 0 still seems correct. > > That is exactly what I said, still quoted above. > It is not what you had written in the previous version. Yes indeed, you are right that was a simple cut and paste error from where I was testing both variants and pasted the wrong one into the email. Below is the hopefully final version which I've just tested here against OS X: #define VMRG_DO(name, element, access, ofs) \ void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ { \ ppc_avr_t result; \ int i, half = ARRAY_SIZE(r->element) / 2; \ \ for (i = 0; i < half; i++) { \ result.access(i * 2 + 0) = a->access(i + ofs); \ result.access(i * 2 + 1) = b->access(i + ofs); \ } \ *r = result; \ } #define VMRG(suffix, element, access) \ VMRG_DO(mrgl##suffix, element, access, half) \ VMRG_DO(mrgh##suffix, element, access, 0) VMRG(b, u8, VsrB) VMRG(h, u16, VsrH) VMRG(w, u32, VsrW) #undef VMRG_DO #undef VMRG ATB, Mark.
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index 598731d47a..59324b2d13 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -976,43 +976,27 @@ void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) } } -#define VMRG_DO(name, element, highp) \ +#define VMRG_DO(name, element, access, ofs) \ void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ { \ ppc_avr_t result; \ int i; \ - size_t n_elems = ARRAY_SIZE(r->element); \ \ - for (i = 0; i < n_elems / 2; i++) { \ - if (highp) { \ - result.element[i*2+HI_IDX] = a->element[i]; \ - result.element[i*2+LO_IDX] = b->element[i]; \ - } else { \ - result.element[n_elems - i * 2 - (1 + HI_IDX)] = \ - b->element[n_elems - i - 1]; \ - result.element[n_elems - i * 2 - (1 + LO_IDX)] = \ - a->element[n_elems - i - 1]; \ - } \ + for (i = 0; i < ARRAY_SIZE(r->element) / 2; i++) { \ + result.access(i * 2 + 0) = a->access(i + ofs); \ + result.access(i * 2 + 1) = b->access(i + ofs); \ } \ *r = result; \ } -#if defined(HOST_WORDS_BIGENDIAN) -#define MRGHI 0 -#define MRGLO 1 -#else -#define MRGHI 1 -#define MRGLO 0 -#endif -#define VMRG(suffix, element) \ - VMRG_DO(mrgl##suffix, element, MRGHI) \ - VMRG_DO(mrgh##suffix, element, MRGLO) -VMRG(b, u8) -VMRG(h, u16) -VMRG(w, u32) + +#define VMRG(suffix, element, access, half) \ + VMRG_DO(mrgl##suffix, element, access, half) \ + VMRG_DO(mrgh##suffix, element, access, 0) +VMRG(b, u8, VsrB, 8) +VMRG(h, u16, VsrH, 4) +VMRG(w, u32, VsrW, 2) #undef VMRG_DO #undef VMRG -#undef MRGHI -#undef MRGLO void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
The current implementations make use of the endian-specific macros MRGLO/MRGHI and also reference HI_IDX and LO_IDX directly to calculate array offsets. Rework the implementation to use the Vsr* macros so that these per-endian references can be removed. Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> --- target/ppc/int_helper.c | 38 +++++++++++--------------------------- 1 file changed, 11 insertions(+), 27 deletions(-)