Message ID | 20181223113859.3675-4-mark.cave-ayland@ilande.co.uk |
---|---|
State | New |
Headers | show |
Series | target/ppc: remove various endian hacks from int_helper.c | expand |
On 12/23/18 10:38 PM, Mark Cave-Ayland wrote: > -#define VMUL_DO(name, mul_element, prod_element, cast, evenp) \ > +#define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ > void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ > { \ > int i; \ > \ > + for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ > + r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ > + (cast)b->mul_access(i); \ > + } \ > + } > + > +#define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ > + void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ > + { \ > + int i; \ > + \ > + for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ > + r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ > + (cast)b->mul_access(i + 1); \ > } \ > } FWIW, for (i = odd; i < ARRAY_SIZE; i += 2) { r->pacc(i >> 1) = (cast)a->macc(i) * b->macc(i); } is sufficient to unify these two. But what you have isn't wrong. Reviewed-by: Richard Henderson <richard.henderson@linaro.org> r~
On 25/12/2018 20:11, Richard Henderson wrote: > On 12/23/18 10:38 PM, Mark Cave-Ayland wrote: >> -#define VMUL_DO(name, mul_element, prod_element, cast, evenp) \ >> +#define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ >> void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ >> { \ >> int i; \ >> \ >> + for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ >> + r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ >> + (cast)b->mul_access(i); \ >> + } \ >> + } >> + >> +#define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ >> + void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ >> + { \ >> + int i; \ >> + \ >> + for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ >> + r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ >> + (cast)b->mul_access(i + 1); \ >> } \ >> } > > FWIW, > > for (i = odd; i < ARRAY_SIZE; i += 2) { > r->pacc(i >> 1) = (cast)a->macc(i) * b->macc(i); > } > > is sufficient to unify these two. But what you have isn't wrong. > > Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Ah indeed that's quite neat! Thinking about it for a few days, I've decided to leave it as-is for now, since it was useful to be able to test the odd/even variants separately (as they were often used together in testing) and it's just that tiny bit easier to relate to the ISA documentation. ATB, Mark.
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index f084a706ee..dc5c9fb8d8 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -1118,33 +1118,39 @@ void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, } } -#define VMUL_DO(name, mul_element, prod_element, cast, evenp) \ +#define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ { \ int i; \ \ - VECTOR_FOR_INORDER_I(i, prod_element) { \ - if (evenp) { \ - r->prod_element[i] = \ - (cast)a->mul_element[i * 2 + HI_IDX] * \ - (cast)b->mul_element[i * 2 + HI_IDX]; \ - } else { \ - r->prod_element[i] = \ - (cast)a->mul_element[i * 2 + LO_IDX] * \ - (cast)b->mul_element[i * 2 + LO_IDX]; \ - } \ + for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ + r->prod_access(i >> 1) = (cast)a->mul_access(i) * \ + (cast)b->mul_access(i); \ + } \ + } + +#define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ + void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ + { \ + int i; \ + \ + for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \ + r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \ + (cast)b->mul_access(i + 1); \ } \ } -#define VMUL(suffix, mul_element, prod_element, cast) \ - VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \ - VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0) -VMUL(sb, s8, s16, int16_t) -VMUL(sh, s16, s32, int32_t) -VMUL(sw, s32, s64, int64_t) -VMUL(ub, u8, u16, uint16_t) -VMUL(uh, u16, u32, uint32_t) -VMUL(uw, u32, u64, uint64_t) -#undef VMUL_DO + +#define VMUL(suffix, mul_element, mul_access, prod_access, cast) \ + VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast) \ + VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast) +VMUL(sb, s8, VsrSB, VsrSH, int16_t) +VMUL(sh, s16, VsrSH, VsrSW, int32_t) +VMUL(sw, s32, VsrSW, VsrSD, int64_t) +VMUL(ub, u8, VsrB, VsrH, uint16_t) +VMUL(uh, u16, VsrH, VsrW, uint32_t) +VMUL(uw, u32, VsrW, VsrD, uint64_t) +#undef VMUL_DO_EVN +#undef VMUL_DO_ODD #undef VMUL void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
The current implementations make use of the endian-specific macros HI_IDX and LO_IDX directly to calculate array offsets. Rework the implementation to use the Vsr* macros so that these per-endian references can be removed. Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> --- target/ppc/int_helper.c | 48 +++++++++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 21 deletions(-)